diff --git a/hyperpars/ppo-caribou.yml b/hyperpars/ppo-caribou.yml index b3aa2a7..62faca4 100644 --- a/hyperpars/ppo-caribou.yml +++ b/hyperpars/ppo-caribou.yml @@ -1,12 +1,47 @@ # stable-baselines3 configuration +# algo: "PPO" +# env_id: "CaribouScipy" +# config: {} +# n_envs: 12 +# tensorboard: "../../../logs" +# total_timesteps: 1000000 +# use_sde: True +# repo: "boettiger-lab/rl4eco" +# save_path: "../saved_agents" +# id: "3" + algo: "PPO" +total_timesteps: 5000000 +algo_config: + tensorboard_log: "../../../logs" + # + policy: 'MlpPolicy' + # batch_size: 512 + # gamma: 0.9999 + # learning_rate: !!float 7.77e-05 + # ent_coef: 0.00429 + # clip_range: 0.1 + # gae_lambda: 0.9 + # max_grad_norm: 5 + # vf_coef: 0.19 + # use_sde: True + # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])" + # in policy_kwargs: net_arch=[400, 300] + # policy: 'MlpPolicy' + # use_sde: True + # policy_kwargs: "dict(log_std_init=-3, net_arch=[400, 300])" + # clip_range: 0.1 + +# env env_id: "CaribouScipy" config: {} n_envs: 12 -tensorboard: "/home/rstudio/logs" -total_timesteps: 500000 -use_sde: True -repo: "boettiger-lab/rl4eco" + +# io +repo: "cboettig/rl-ecology" save_path: "../saved_agents" -id: "2" \ No newline at end of file + +# misc +# id: "" +additional_imports: ["torch"] \ No newline at end of file diff --git a/hyperpars/tqc-caribou.yml b/hyperpars/tqc-caribou.yml index bacbc98..3cf4085 100644 --- a/hyperpars/tqc-caribou.yml +++ b/hyperpars/tqc-caribou.yml @@ -1,12 +1,47 @@ # stable-baselines3 configuration +# algo: "TQC" +# env_id: "CaribouScipy" +# n_envs: 12 +# tensorboard: "/home/rstudio/logs" +# total_timesteps: 500000 +# config: {} +# use_sde: True +# repo: "boettiger-lab/rl4eco" +# save_path: "../saved_agents" +# id: "2" + algo: "TQC" +total_timesteps: 5000000 +algo_config: + tensorboard_log: "../../../logs" + # + policy: 'MlpPolicy' + # batch_size: 512 + # gamma: 0.9999 + # learning_rate: !!float 7.77e-05 + # ent_coef: 0.00429 + # clip_range: 0.1 + # gae_lambda: 0.9 + # max_grad_norm: 5 + # vf_coef: 0.19 + # use_sde: True + # policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])" + # in policy_kwargs: net_arch=[400, 300] + # policy: 'MlpPolicy' + # use_sde: True + # policy_kwargs: "dict(log_std_init=-3, net_arch=[400, 300])" + # clip_range: 0.1 + +# env env_id: "CaribouScipy" -n_envs: 12 -tensorboard: "/home/rstudio/logs" -total_timesteps: 500000 config: {} -use_sde: True -repo: "boettiger-lab/rl4eco" +n_envs: 12 + +# io +repo: "cboettig/rl-ecology" save_path: "../saved_agents" -id: "2" \ No newline at end of file + +# misc +# id: "" +additional_imports: ["torch"] \ No newline at end of file diff --git a/notebooks/optimal-fixed-policy-cont.ipynb b/notebooks/optimal-fixed-policy-cont.ipynb new file mode 100644 index 0000000..5faf5b6 --- /dev/null +++ b/notebooks/optimal-fixed-policy-cont.ipynb @@ -0,0 +1,1636 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "287ae04f-d0eb-45ae-85b3-2161c80fc115", + "metadata": {}, + "source": [ + "# Optimizing fixed policies for continuous-time system" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b9691ca4-5cbc-43dd-bbb3-c385836a2c5d", + "metadata": {}, + "outputs": [], + "source": [ + "from rl4caribou import CaribouScipy as carib\n", + "from rl4caribou.agents import constAction\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from plotnine import ggplot, aes, geom_point, geom_ribbon, geom_density, geom_line\n", + "import polars as pl\n", + "from skopt import gp_minimize, gbrt_minimize\n", + "from skopt.plots import plot_objective, plot_convergence" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "ac439781-ac01-4649-8830-6e5379bc3705", + "metadata": {}, + "outputs": [], + "source": [ + "import ray\n", + "\n", + "@ray.remote\n", + "def gen_ep_rew(manager, env):\n", + " episode_reward = 0.0\n", + " observation, _ = env.reset()\n", + " for t in range(env.Tmax):\n", + " action, _ = manager.predict(observation)\n", + " observation, reward, terminated, done, info = env.step(action)\n", + " episode_reward += reward\n", + " if terminated or done:\n", + " break\n", + " return episode_reward\n", + "\n", + "def gather_stats(manager, env, N=200, return_ep_rewards=False):\n", + " results = ray.get(\n", + " [gen_ep_rew.remote(manager, env) for _ in range(N)]\n", + " )\n", + " ray.shutdown()\n", + " # results = [gen_ep_rew(manager, env) for _ in range(N)]\n", + " #\n", + " if return_ep_rewards:\n", + " return results\n", + " y = np.mean(results)\n", + " sigma = np.std(results)\n", + " ymin = y - sigma\n", + " ymax = y + sigma\n", + " return y, ymin, ymax " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "710a3e0c-1965-427c-8b42-01e2eddddf8e", + "metadata": {}, + "outputs": [], + "source": [ + "# pacifist = constAction(mortality_vec=np.array([0.0,0.0,0.0]))\n", + "# gather_stats(pacifist, carib())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "63db8123-9eba-41d4-bafe-264a80963ade", + "metadata": {}, + "outputs": [], + "source": [ + "CONFIG = {}\n", + "\n", + "def g(x):\n", + " manager = constAction(x)\n", + " out = gather_stats(manager, carib(config=CONFIG))\n", + " return - out[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9b68f22e-a7fe-43f0-9dc6-6ccc5f8d007e", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 1 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:35:28,931\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 1 ended. Evaluation done at random point.\n", + "Time taken: 7.2761\n", + "Function value obtained: 854.4719\n", + "Current minimum: 854.4719\n", + "Iteration No: 2 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:35:36,220\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 2 ended. Evaluation done at random point.\n", + "Time taken: 7.2402\n", + "Function value obtained: 807.4576\n", + "Current minimum: 807.4576\n", + "Iteration No: 3 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:35:43,438\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 3 ended. Evaluation done at random point.\n", + "Time taken: 7.3030\n", + "Function value obtained: 690.1642\n", + "Current minimum: 690.1642\n", + "Iteration No: 4 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:35:51,794\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 4 ended. Evaluation done at random point.\n", + "Time taken: 8.3079\n", + "Function value obtained: 1011.0399\n", + "Current minimum: 690.1642\n", + "Iteration No: 5 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:35:59,077\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 5 ended. Evaluation done at random point.\n", + "Time taken: 7.6676\n", + "Function value obtained: 425.6824\n", + "Current minimum: 425.6824\n", + "Iteration No: 6 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:36:06,866\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 6 ended. Evaluation done at random point.\n", + "Time taken: 7.8028\n", + "Function value obtained: 824.5336\n", + "Current minimum: 425.6824\n", + "Iteration No: 7 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:36:14,615\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 7 ended. Evaluation done at random point.\n", + "Time taken: 7.4041\n", + "Function value obtained: 478.3516\n", + "Current minimum: 425.6824\n", + "Iteration No: 8 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:36:21,976\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 8 ended. Evaluation done at random point.\n", + "Time taken: 7.5963\n", + "Function value obtained: 945.0113\n", + "Current minimum: 425.6824\n", + "Iteration No: 9 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:36:29,588\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 9 ended. Evaluation done at random point.\n", + "Time taken: 7.3435\n", + "Function value obtained: 952.3377\n", + "Current minimum: 425.6824\n", + "Iteration No: 10 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:36:36,936\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 10 ended. Evaluation done at random point.\n", + "Time taken: 7.6181\n", + "Function value obtained: 682.9579\n", + "Current minimum: 425.6824\n", + "Iteration No: 11 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:36:44,629\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 11 ended. Search finished for the next optimal point.\n", + "Time taken: 7.8691\n", + "Function value obtained: 616.3808\n", + "Current minimum: 425.6824\n", + "Iteration No: 12 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:36:52,464\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 12 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1113\n", + "Function value obtained: 442.6618\n", + "Current minimum: 425.6824\n", + "Iteration No: 13 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:00,564\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 13 ended. Search finished for the next optimal point.\n", + "Time taken: 7.8246\n", + "Function value obtained: 428.3955\n", + "Current minimum: 425.6824\n", + "Iteration No: 14 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:08,359\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 14 ended. Search finished for the next optimal point.\n", + "Time taken: 7.8876\n", + "Function value obtained: 481.3296\n", + "Current minimum: 425.6824\n", + "Iteration No: 15 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:16,256\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 15 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0396\n", + "Function value obtained: 406.1676\n", + "Current minimum: 406.1676\n", + "Iteration No: 16 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:24,308\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 16 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9728\n", + "Function value obtained: 377.4997\n", + "Current minimum: 377.4997\n", + "Iteration No: 17 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:32,352\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 17 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1355\n", + "Function value obtained: 344.7973\n", + "Current minimum: 344.7973\n", + "Iteration No: 18 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:40,394\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 18 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1918\n", + "Function value obtained: 313.7336\n", + "Current minimum: 313.7336\n", + "Iteration No: 19 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:48,620\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 19 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9072\n", + "Function value obtained: 297.7170\n", + "Current minimum: 297.7170\n", + "Iteration No: 20 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:37:56,585\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 20 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1385\n", + "Function value obtained: 250.2561\n", + "Current minimum: 250.2561\n", + "Iteration No: 21 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:38:04,669\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 21 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9760\n", + "Function value obtained: 155.7173\n", + "Current minimum: 155.7173\n", + "Iteration No: 22 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:38:12,650\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 22 ended. Search finished for the next optimal point.\n", + "Time taken: 7.8007\n", + "Function value obtained: 196.2027\n", + "Current minimum: 155.7173\n", + "Iteration No: 23 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:38:20,440\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 23 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9883\n", + "Function value obtained: 356.1454\n", + "Current minimum: 155.7173\n", + "Iteration No: 24 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:38:28,406\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 24 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9125\n", + "Function value obtained: 144.7521\n", + "Current minimum: 144.7521\n", + "Iteration No: 25 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:38:36,434\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 25 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1715\n", + "Function value obtained: 153.4413\n", + "Current minimum: 144.7521\n", + "Iteration No: 26 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:38:44,524\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 26 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0445\n", + "Function value obtained: 154.4670\n", + "Current minimum: 144.7521\n", + "Iteration No: 27 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:38:52,585\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 27 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9848\n", + "Function value obtained: 128.6586\n", + "Current minimum: 128.6586\n", + "Iteration No: 28 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:00,597\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 28 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9924\n", + "Function value obtained: 133.4479\n", + "Current minimum: 128.6586\n", + "Iteration No: 29 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:08,585\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 29 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0684\n", + "Function value obtained: 123.6162\n", + "Current minimum: 123.6162\n", + "Iteration No: 30 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:16,660\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 30 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9680\n", + "Function value obtained: 102.7805\n", + "Current minimum: 102.7805\n", + "Iteration No: 31 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:25,674\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 31 ended. Search finished for the next optimal point.\n", + "Time taken: 9.0779\n", + "Function value obtained: 96.8023\n", + "Current minimum: 96.8023\n", + "Iteration No: 32 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:34,722\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 32 ended. Search finished for the next optimal point.\n", + "Time taken: 9.2097\n", + "Function value obtained: 94.4083\n", + "Current minimum: 94.4083\n", + "Iteration No: 33 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:42,904\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 33 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9212\n", + "Function value obtained: 93.7777\n", + "Current minimum: 93.7777\n", + "Iteration No: 34 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:50,894\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 34 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0790\n", + "Function value obtained: 85.1689\n", + "Current minimum: 85.1689\n", + "Iteration No: 35 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:39:58,895\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 35 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9322\n", + "Function value obtained: 81.9333\n", + "Current minimum: 81.9333\n", + "Iteration No: 36 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:40:07,875\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 36 ended. Search finished for the next optimal point.\n", + "Time taken: 9.8170\n", + "Function value obtained: 75.4507\n", + "Current minimum: 75.4507\n", + "Iteration No: 37 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:40:16,693\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 37 ended. Search finished for the next optimal point.\n", + "Time taken: 8.8212\n", + "Function value obtained: 63.9293\n", + "Current minimum: 63.9293\n", + "Iteration No: 38 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:40:25,477\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 38 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0550\n", + "Function value obtained: 80.0707\n", + "Current minimum: 63.9293\n", + "Iteration No: 39 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:40:33,541\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 39 ended. Search finished for the next optimal point.\n", + "Time taken: 8.7718\n", + "Function value obtained: 94.8528\n", + "Current minimum: 63.9293\n", + "Iteration No: 40 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:40:42,346\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 40 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0144\n", + "Function value obtained: 247.2551\n", + "Current minimum: 63.9293\n", + "Iteration No: 41 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:40:50,368\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 41 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0527\n", + "Function value obtained: 138.7606\n", + "Current minimum: 63.9293\n", + "Iteration No: 42 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:40:58,393\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 42 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1034\n", + "Function value obtained: 73.1566\n", + "Current minimum: 63.9293\n", + "Iteration No: 43 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:41:06,555\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 43 ended. Search finished for the next optimal point.\n", + "Time taken: 8.8306\n", + "Function value obtained: 76.5328\n", + "Current minimum: 63.9293\n", + "Iteration No: 44 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:41:15,332\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 44 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9906\n", + "Function value obtained: 141.0108\n", + "Current minimum: 63.9293\n", + "Iteration No: 45 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:41:23,337\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 45 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9446\n", + "Function value obtained: 999.3756\n", + "Current minimum: 63.9293\n", + "Iteration No: 46 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:41:31,315\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 46 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0997\n", + "Function value obtained: 68.8695\n", + "Current minimum: 63.9293\n", + "Iteration No: 47 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:41:39,458\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 47 ended. Search finished for the next optimal point.\n", + "Time taken: 8.2354\n", + "Function value obtained: 76.8423\n", + "Current minimum: 63.9293\n", + "Iteration No: 48 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:41:47,649\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 48 ended. Search finished for the next optimal point.\n", + "Time taken: 8.2502\n", + "Function value obtained: 65.0447\n", + "Current minimum: 63.9293\n", + "Iteration No: 49 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:41:55,953\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 49 ended. Search finished for the next optimal point.\n", + "Time taken: 8.2377\n", + "Function value obtained: 63.9185\n", + "Current minimum: 63.9185\n", + "Iteration No: 50 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:42:05,137\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 50 ended. Search finished for the next optimal point.\n", + "Time taken: 9.0671\n", + "Function value obtained: 61.0286\n", + "Current minimum: 61.0286\n", + "CPU times: user 8min 4s, sys: 9min 38s, total: 17min 42s\n", + "Wall time: 6min 44s\n" + ] + }, + { + "data": { + "text/plain": [ + "(61.02861117378444, [0.2521079381524335, 0.0, 0.0])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "res = gp_minimize(\n", + " g, \n", + " [(0.0, 1.0), (0.0, 1.0), (0.0, 1.0)], \n", + " n_calls = 50, \n", + " verbose=True,\n", + ")\n", + "res.fun, res.x" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "1cc458c3-7e28-483a-9862-6c323586ec0e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 19:41:41,464\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "data": { + "text/plain": [ + "71.11453931779327" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# -> (61.02861117378444, [0.2521079381524335, 0.0, 0.0])\n", + "\n", + "# plot_convergence(res)\n", + "g([0.24, 0.0, 0.0])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "5a06bdf5-0f6c-411a-bac6-bd15e701bbfe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_objective(res)" + ] + }, + { + "cell_type": "markdown", + "id": "d03ebf66-ee2d-45ca-bdd5-6ef0aa79ca45", + "metadata": {}, + "source": [ + "## Test solution" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "7193ccbe-d021-4eb8-a0b8-bd07dce76309", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# CONFIG = {'sigma_M':0, 'sigma_B':0, 'sigma_W':0, }\n", + "CONFIG = {}\n", + "env = carib(config=CONFIG)\n", + "\n", + "null_action = - np.ones(3, dtype=np.float32)\n", + "high_action = 0.5 * np.ones(3, dtype=np.float32)\n", + "all_wolves = np.float32([0.0, 0.5, 0.0])\n", + "all_moose = np.float32([0.5, 0.0, 0.0])\n", + "an_effort = np.float32(res.x)\n", + "# an_effort = np.float32([0.018989021076692904, 0.3169121824404405])\n", + "\n", + "an_action = 2 * an_effort- 1\n", + "\n", + "Ms, Bs, Ws, ts, rews = [], [], [], [], []\n", + "a_ms, a_bs = [], []\n", + "\n", + "obs, _ = env.reset()\n", + "ep_rew=0\n", + "#\n", + "pop = env.population_units()\n", + "Ms.append(pop[0])\n", + "Bs.append(pop[1])\n", + "Ws.append(pop[2])\n", + "ts.append(0)\n", + "a_ms.append(env.parameters[\"a_M\"])\n", + "a_bs.append(env.parameters[\"a_B\"])\n", + "rews.append(ep_rew)\n", + "#\n", + "for t in range(env.Tmax):\n", + " ts.append(t+1)\n", + " obs, rew, term, trunc, info = env.step(an_action)\n", + " pop = env.population_units()\n", + " Ms.append(pop[0])\n", + " Bs.append(pop[1])\n", + " Ws.append(pop[2])\n", + " a_ms.append(env.parameters[\"a_M\"])\n", + " a_bs.append(env.parameters[\"a_B\"])\n", + " ep_rew += rew\n", + " rews.append(ep_rew)\n", + " if term or trunc:\n", + " break\n", + " \n", + "\n", + "ep = pd.DataFrame({\n", + " 't': ts,\n", + " 'm': Ms,\n", + " 'b': Bs,\n", + " 'w': Ws,\n", + " 'rew': rews,\n", + " 'a_M': a_ms,\n", + " 'a_B': a_bs,\n", + "})\n", + "\n", + "ep.plot(x='t', y=['m', 'b', 'w'], title=f'action = {res.x}, rew = {ep_rew}')" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "2e321221-4875-4322-8698-7c4855435117", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 18:44:22,765\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "data": { + "text/plain": [ + "(-68.40970542630504, -137.09041561089623, 0.27100475828616766)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# ep.plot(x='t', y=['a_B'], title=f'{ep_rew}')\n", + "\n", + "manager = constAction(res.x)\n", + "gather_stats(manager, carib(config=CONFIG))" + ] + }, + { + "cell_type": "markdown", + "id": "b0cad9ca-3a27-4071-be8c-74768c9feddd", + "metadata": {}, + "source": [ + "## RL solution" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "f997716b-ac61-4985-bfdc-1471f59f835a", + "metadata": {}, + "outputs": [], + "source": [ + "from stable_baselines3 import PPO\n", + "from sb3_contrib import TQC\n", + "\n", + "ppoAgent = PPO.load('../saved_agents/PPO-CaribouScipy', device='cpu')\n", + "tqcAgent = TQC.load('../saved_agents/TQC-CaribouScipy', device='cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "83a7a494-b5d3-4942-bd46-b4ad0e1efab7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# CONFIG = {'sigma_M':0, 'sigma_B':0, 'sigma_W':0, }\n", + "CONFIG = {}\n", + "env = carib(config=CONFIG)\n", + "\n", + "Ms, Bs, Ws, ts, rews = [], [], [], [], []\n", + "a_ms, a_bs = [], []\n", + "mculls, wculls, rests = [], [], []\n", + "\n", + "obs, _ = env.reset()\n", + "ep_rew=0\n", + "#\n", + "pop = env.population_units()\n", + "Ms.append(pop[0])\n", + "Bs.append(pop[1])\n", + "Ws.append(pop[2])\n", + "ts.append(0)\n", + "a_ms.append(env.parameters[\"a_M\"])\n", + "a_bs.append(env.parameters[\"a_B\"])\n", + "rews.append(ep_rew)\n", + "mculls.append(0)\n", + "wculls.append(0)\n", + "rests.append(0)\n", + "#\n", + "for t in range(env.Tmax):\n", + " action, info = ppoAgent.predict(obs)\n", + " ts.append(t+1)\n", + " obs, rew, term, trunc, info = env.step(action)\n", + " pop = env.population_units()\n", + " Ms.append(pop[0])\n", + " Bs.append(pop[1])\n", + " Ws.append(pop[2])\n", + " a_ms.append(env.parameters[\"a_M\"])\n", + " a_bs.append(env.parameters[\"a_B\"])\n", + " mculls.append((action[0] + 1)/2)\n", + " wculls.append((action[1] + 1)/2)\n", + " rests.append((action[2] + 1)/2)\n", + " ep_rew += rew\n", + " rews.append(ep_rew)\n", + " if term or trunc:\n", + " break\n", + " \n", + "\n", + "ep = pd.DataFrame({\n", + " 't': ts,\n", + " 'm': Ms,\n", + " 'b': Bs,\n", + " 'w': Ws,\n", + " 'rew': rews,\n", + " 'a_M': a_ms,\n", + " 'a_B': a_bs,\n", + " 'mcull': mculls,\n", + " 'wcull': wculls,\n", + " 'restoration': rests,\n", + "})\n", + "\n", + "ep.plot(x='t', y=['m', 'b', 'w'], title=f'{ep_rew}')" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "47f2b1e7-8743-4e3a-a6f4-b04273222137", + "metadata": {}, + "outputs": [], + "source": [ + "window = 10\n", + "ep['wcull_moving_av'] = ep['wcull'].rolling(window=window).mean()\n", + "ep['mcull_moving_av'] = ep['mcull'].rolling(window=window).mean()\n", + "ep['rest_moving_av'] = ep['restoration'].rolling(window=window).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "a6f8b8a7-b7ef-40fc-b54f-ada9e2667d52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ep.plot(\n", + " x='t', \n", + " y=['wcull_moving_av', 'mcull_moving_av', 'rest_moving_av'], \n", + " title=f'{ep_rew}',\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "e26a31c0-9fed-4618-88a4-a2d5a895c6ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# CONFIG = {'sigma_M':0, 'sigma_B':0, 'sigma_W':0, }\n", + "CONFIG = {}\n", + "env = carib(config=CONFIG)\n", + "\n", + "Ms, Bs, Ws, ts, rews = [], [], [], [], []\n", + "a_ms, a_bs = [], []\n", + "mculls, wculls, rests = [], [], []\n", + "\n", + "obs, _ = env.reset()\n", + "ep_rew=0\n", + "#\n", + "pop = env.population_units()\n", + "Ms.append(pop[0])\n", + "Bs.append(pop[1])\n", + "Ws.append(pop[2])\n", + "ts.append(0)\n", + "a_ms.append(env.parameters[\"a_M\"])\n", + "a_bs.append(env.parameters[\"a_B\"])\n", + "rews.append(ep_rew)\n", + "mculls.append(0)\n", + "wculls.append(0)\n", + "rests.append(0)\n", + "#\n", + "for t in range(env.Tmax):\n", + " action, info = tqcAgent.predict(obs)\n", + " ts.append(t+1)\n", + " obs, rew, term, trunc, info = env.step(action)\n", + " pop = env.population_units()\n", + " Ms.append(pop[0])\n", + " Bs.append(pop[1])\n", + " Ws.append(pop[2])\n", + " a_ms.append(env.parameters[\"a_M\"])\n", + " a_bs.append(env.parameters[\"a_B\"])\n", + " mculls.append((action[0] + 1)/2)\n", + " wculls.append((action[1] + 1)/2)\n", + " rests.append((action[2] + 1)/2)\n", + " ep_rew += rew\n", + " rews.append(ep_rew)\n", + " if term or trunc:\n", + " break\n", + " \n", + "\n", + "ep = pd.DataFrame({\n", + " 't': ts,\n", + " 'm': Ms,\n", + " 'b': Bs,\n", + " 'w': Ws,\n", + " 'rew': rews,\n", + " 'a_M': a_ms,\n", + " 'a_B': a_bs,\n", + " 'mcull': mculls,\n", + " 'wcull': wculls,\n", + " 'restoration': rests,\n", + "})\n", + "\n", + "ep.plot(x='t', y=['m', 'b', 'w'], title=f'{ep_rew}')" + ] + }, + { + "cell_type": "markdown", + "id": "ac568255-90f9-4552-ac44-c2050e21d97c", + "metadata": {}, + "source": [ + "#### Uhhhh... what?" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "142c7538-f41a-46ce-b11f-a0c5a4e8f55f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ep.plot(x='t', y=['mcull', 'wcull', 'restoration'], title=f'{ep_rew}')" + ] + }, + { + "cell_type": "markdown", + "id": "d552ce2c-6d6d-4af6-a703-c58af8d61e42", + "metadata": {}, + "source": [ + "## Reward distributions" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "59a0b8ea-81cb-4ad7-8565-0d687fb2290d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-20 20:49:40,491\tINFO worker.py:1749 -- Started a local Ray instance.\n", + "2024-05-20 20:49:49,560\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + } + ], + "source": [ + "caAgentRews = gather_stats(\n", + " manager = constAction(res.x), \n", + " env=carib(), \n", + " N=300, \n", + " return_ep_rewards=True,\n", + ")\n", + "ppoAgentRews = gather_stats(\n", + " manager = ppoAgent, \n", + " env=carib(), \n", + " N=300, \n", + " return_ep_rewards=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "69e748a5-db7c-426f-aab3-afecc4eff77f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ppo: -38.17, c. act.: -59.44\n" + ] + } + ], + "source": [ + "ca_df = pd.DataFrame({\n", + " 'rew': caAgentRews,\n", + " 'strat': 'const_action',\n", + "})\n", + "ppo_df = pd.DataFrame({\n", + " 'rew': ppoAgentRews,\n", + " 'strat': 'ppo',\n", + "})\n", + "rews_df = pd.concat([ca_df, ppo_df])\n", + "print(f\"ppo: {np.mean(ppo_df.rew): .2f}, c. act.: {np.mean(ca_df.rew): .2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "17deda7d-6b0d-48db-b7ac-f4758f8d520a", + "metadata": {}, + "outputs": [], + "source": [ + "from plotnine import ggtitle" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "2ba51855-f434-4aa4-9dbd-0e6026fad873", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "" + }, + "metadata": { + "image/png": { + "height": 480, + "width": 640 + } + }, + "output_type": "display_data" + } + ], + "source": [ + "(\n", + " ggplot(rews_df, aes(x='rew', fill='strat')) \n", + " + geom_density(alpha=0.6) \n", + " + ggtitle(f\"ppo mean: {np.mean(ppo_df.rew): .2f}, c. act. mean: {np.mean(ca_df.rew): .2f}\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47dd1494-58b3-4391-b021-32c6b78fcc14", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1880ffd9-e552-4e29-a282-b2651ff07800", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/optimal-fixed-policy.ipynb b/notebooks/optimal-fixed-policy.ipynb index 5b1b806..08e488a 100644 --- a/notebooks/optimal-fixed-policy.ipynb +++ b/notebooks/optimal-fixed-policy.ipynb @@ -10,71 +10,20 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 1, "id": "f15d4b8e-ef57-4bce-899b-89bb32d396f6", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Obtaining file:///home/rstudio/rl4fisheries\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Checking if build backend supports build_editable ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build editable ... \u001b[?25ldone\n", - "\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Preparing editable metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hRequirement already satisfied: gymnasium in /opt/venv/lib/python3.10/site-packages (from rl4fisheries==1.0.0) (0.28.1)\n", - "Requirement already satisfied: numpy in /opt/venv/lib/python3.10/site-packages (from rl4fisheries==1.0.0) (1.26.4)\n", - "Requirement already satisfied: matplotlib in /opt/venv/lib/python3.10/site-packages (from rl4fisheries==1.0.0) (3.8.2)\n", - "Requirement already satisfied: typing in /opt/venv/lib/python3.10/site-packages (from rl4fisheries==1.0.0) (3.7.4.3)\n", - "Requirement already satisfied: jax-jumpy>=1.0.0 in /opt/venv/lib/python3.10/site-packages (from gymnasium->rl4fisheries==1.0.0) (1.0.0)\n", - "Requirement already satisfied: cloudpickle>=1.2.0 in /opt/venv/lib/python3.10/site-packages (from gymnasium->rl4fisheries==1.0.0) (3.0.0)\n", - "Requirement already satisfied: typing-extensions>=4.3.0 in /opt/venv/lib/python3.10/site-packages (from gymnasium->rl4fisheries==1.0.0) (4.9.0)\n", - "Requirement already satisfied: farama-notifications>=0.0.1 in /opt/venv/lib/python3.10/site-packages (from gymnasium->rl4fisheries==1.0.0) (0.0.4)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (1.2.0)\n", - "Requirement already satisfied: cycler>=0.10 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (4.48.1)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (1.4.5)\n", - "Requirement already satisfied: packaging>=20.0 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (23.2)\n", - "Requirement already satisfied: pillow>=8 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (10.2.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (3.1.1)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /opt/venv/lib/python3.10/site-packages (from matplotlib->rl4fisheries==1.0.0) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /opt/venv/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib->rl4fisheries==1.0.0) (1.16.0)\n", - "Building wheels for collected packages: rl4fisheries\n", - " Building editable for rl4fisheries (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for rl4fisheries: filename=rl4fisheries-1.0.0-0.editable-py3-none-any.whl size=2176 sha256=aebb65ca4f07d99d588c7fb0de18b7cdbb9aff0bb29ab44fe3dd315eb92caf22\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-t5m_i4it/wheels/d3/ce/fe/d5af67bb4edf309f6a59d59140b2b78d5a336b2ad4b93a1fb4\n", - "Successfully built rl4fisheries\n", - "Installing collected packages: rl4fisheries\n", - " Attempting uninstall: rl4fisheries\n", - " Found existing installation: rl4fisheries 1.0.0\n", - " Uninstalling rl4fisheries-1.0.0:\n", - " Successfully uninstalled rl4fisheries-1.0.0\n", - "Successfully installed rl4fisheries-1.0.0\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: scikit-optimize in /opt/venv/lib/python3.10/site-packages (0.9.0)\n", - "Requirement already satisfied: joblib>=0.11 in /opt/venv/lib/python3.10/site-packages (from scikit-optimize) (1.3.2)\n", - "Requirement already satisfied: pyaml>=16.9 in /opt/venv/lib/python3.10/site-packages (from scikit-optimize) (23.12.0)\n", - "Requirement already satisfied: numpy>=1.13.3 in /opt/venv/lib/python3.10/site-packages (from scikit-optimize) (1.26.4)\n", - "Requirement already satisfied: scipy>=0.19.1 in /opt/venv/lib/python3.10/site-packages (from scikit-optimize) (1.12.0)\n", - "Requirement already satisfied: scikit-learn>=0.20.0 in /opt/venv/lib/python3.10/site-packages (from scikit-optimize) (1.4.0)\n", - "Requirement already satisfied: PyYAML in /opt/venv/lib/python3.10/site-packages (from pyaml>=16.9->scikit-optimize) (6.0.1)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/venv/lib/python3.10/site-packages (from scikit-learn>=0.20.0->scikit-optimize) (3.2.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install -e ..\n", + "# %pip install -e ..\n", "# %pip install scikit-optimize" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 1, "id": "8a7920aa-5e69-4690-be6d-f03308ddf449", "metadata": {}, "outputs": [], @@ -83,7 +32,10 @@ "from skopt import gp_minimize, gbrt_minimize\n", "import polars as pl\n", "import numpy as np\n", - "from plotnine import ggplot, aes, geom_point, geom_ribbon\n" + "from plotnine import ggplot, aes, geom_point, geom_ribbon\n", + "\n", + "import pandas as pd\n", + "from skopt.plots import plot_objective, plot_convergence\n" ] }, { @@ -96,14 +48,14 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 2, "id": "51c9e6d9-9299-4296-b647-cf498b0b92cb", "metadata": {}, "outputs": [], "source": [ "class fixed_effort:\n", - " def __init__(self, action):\n", - " self.effort = np.array(action, dtype=np.float32)\n", + " def __init__(self, effort):\n", + " self.effort = np.array(effort, dtype=np.float32)\n", "\n", " def predict(self, observation, **kwargs):\n", " action = self.effort * 2 - 1\n", @@ -114,27 +66,70 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 3, "id": "cdeb4bd8-9620-4c4f-829d-a3a2c0e8dfd3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(array([-0.4943695 , -0.48672426, -0.8089408 ], dtype=float32),\n", - " 0.250531405210495,\n", + "(array([-0.88323575, -0.9932028 , -0.81746125], dtype=float32),\n", + " -6.324660778045655e-05,\n", " False,\n", " False,\n", " {})" ] }, - "execution_count": 37, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "env = Caribou()\n", + "am = {\"current\": 1, \"full_rest\": 0.5}\n", + "ab = {\"current\": 5, \"full_rest\": 1}\n", + "\n", + "parameters = {\n", + " \"r_m\": np.float32(0.5),\n", + " \"r_b\": np.float32(0.45),\n", + " #\n", + " \"alpha_mm\": np.float32(0.1),\n", + " \"alpha_bb\": np.float32(0.1),\n", + " \"alpha_bm\": np.float32(0.05),\n", + " \"alpha_mb\": np.float32(0.05),\n", + " #\n", + " \"a_M\": am[\"current\"],\n", + " \"a_B\": ab[\"current\"],\n", + " # \"a_M\": 1,\n", + " # \"a_B\": 2,\n", + " #\n", + " \"K_m\": np.float32(1.1),\n", + " \"K_b\": np.float32(0.40),\n", + " #\n", + " \"h_B\": np.float32(0.031),\n", + " \"h_M\": np.float32(0.31),\n", + " #\n", + " \"x\": np.float32(2),\n", + " \"u\": np.float32(1),\n", + " \"d\": np.float32(0.3),\n", + " #\n", + " \"sigma_M\": np.float32(0.2),\n", + " \"sigma_B\": np.float32(0.25),\n", + " \"sigma_W\": np.float32(0.2),\n", + " # \"sigma_M\": np.float32(0.),\n", + " # \"sigma_B\": np.float32(0.),\n", + " # \"sigma_W\": np.float32(0.),\n", + " \"additive_sigma\": np.float32(0.005),\n", + "}\n", + "\n", + "\n", + "config = {\n", + " 'parameters': parameters,\n", + " 'initial_pop': np.float32([0.572079, 0.025453, 0.911731]),\n", + " # ^ use convergence point for null action and no stochasticity\n", + "}\n", + "\n", + "env = Caribou(config=config)\n", "obs = env.reset()\n", "action, _ = pacifist.predict(obs)\n", "env.step(action)" @@ -154,11 +149,14 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 4, "id": "6fa4681c-fbca-4ade-a8ab-e021cd9d07e5", "metadata": {}, "outputs": [], "source": [ + "import ray\n", + "\n", + "@ray.remote\n", "def gen_ep_rew(manager, env):\n", " episode_reward = 0.0\n", " observation, _ = env.reset()\n", @@ -170,8 +168,12 @@ " break\n", " return episode_reward\n", "\n", - "def gather_stats(manager, env, N=10):\n", - " results = [gen_ep_rew(manager, env) for _ in range(N)]\n", + "def gather_stats(manager, env, N=200):\n", + " results = ray.get(\n", + " [gen_ep_rew.remote(manager, env) for _ in range(N)]\n", + " )\n", + " ray.shutdown()\n", + " #\n", " y = np.mean(results)\n", " sigma = np.std(results)\n", " ymin = y - sigma\n", @@ -181,23 +183,31 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 9, "id": "f3ddd3b8-6696-4f0d-a9a1-c6d722deb3b1", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:25:38,232\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, { "data": { "text/plain": [ - "(23.70720968544483, 5.766838293769911, 41.64758107711975)" + "(-5.567889918454439, -6.924977668806896, -4.210802168101981)" ] }, - "execution_count": 40, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "gen_ep_rew(pacifist, env)\n", "gather_stats(pacifist, env)" ] }, @@ -213,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 5, "id": "d876df99-b2ab-49ab-aaa0-3b545cc7ae4b", "metadata": {}, "outputs": [], @@ -226,8 +236,8 @@ }, { "cell_type": "code", - "execution_count": 50, - "id": "812edc32-f0f9-4ff4-9792-77acf6962179", + "execution_count": 12, + "id": "049ba8a4-b81f-4b31-815b-be7970cba35d", "metadata": { "scrolled": true }, @@ -236,57 +246,1049 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2min 42s, sys: 9min 48s, total: 12min 31s\n", - "Wall time: 1min 40s\n" + "Iteration No: 1 started. Evaluating function at random point.\n" ] }, { - "data": { - "text/plain": [ - "(-192.28646437703884, [0.17790704682764627, 0.061024282615602])" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "res = gp_minimize(g, [(0.0, 0.3), (0, 0.3)], n_calls = 300)\n", - "res.fun, res.x" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "4c5c2ec8-f61b-4dae-bc1b-ba70310a694b", - "metadata": {}, - "outputs": [ + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:26:41,368\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3min 41s, sys: 644 ms, total: 3min 42s\n", - "Wall time: 3min 40s\n" + "Iteration No: 1 ended. Evaluation done at random point.\n", + "Time taken: 6.9099\n", + "Function value obtained: 8.3944\n", + "Current minimum: 8.3944\n", + "Iteration No: 2 started. Evaluating function at random point.\n" ] }, { - "data": { - "text/plain": [ - "(-183.45138428616946, [0.1625976286665279, 0.05916838814404951])" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "res = gbrt_minimize(g, [(0.0, 0.3), (0, 0.3)], n_calls = 300)\n", - "res.fun, res.x" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:26:48,140\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 2 ended. Evaluation done at random point.\n", + "Time taken: 6.6931\n", + "Function value obtained: 8.5189\n", + "Current minimum: 8.3944\n", + "Iteration No: 3 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:26:54,954\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 3 ended. Evaluation done at random point.\n", + "Time taken: 6.8978\n", + "Function value obtained: 7.8332\n", + "Current minimum: 7.8332\n", + "Iteration No: 4 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:01,789\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 4 ended. Evaluation done at random point.\n", + "Time taken: 6.5554\n", + "Function value obtained: 6.4482\n", + "Current minimum: 6.4482\n", + "Iteration No: 5 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:08,324\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 5 ended. Evaluation done at random point.\n", + "Time taken: 6.7635\n", + "Function value obtained: 7.8851\n", + "Current minimum: 6.4482\n", + "Iteration No: 6 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:15,202\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 6 ended. Evaluation done at random point.\n", + "Time taken: 6.6887\n", + "Function value obtained: 7.4570\n", + "Current minimum: 6.4482\n", + "Iteration No: 7 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:21,924\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 7 ended. Evaluation done at random point.\n", + "Time taken: 6.7629\n", + "Function value obtained: 7.4776\n", + "Current minimum: 6.4482\n", + "Iteration No: 8 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:28,673\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 8 ended. Evaluation done at random point.\n", + "Time taken: 6.6721\n", + "Function value obtained: 8.2757\n", + "Current minimum: 6.4482\n", + "Iteration No: 9 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:35,228\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 9 ended. Evaluation done at random point.\n", + "Time taken: 6.4267\n", + "Function value obtained: 3.4761\n", + "Current minimum: 3.4761\n", + "Iteration No: 10 started. Evaluating function at random point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:41,699\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 10 ended. Evaluation done at random point.\n", + "Time taken: 6.8884\n", + "Function value obtained: 7.7584\n", + "Current minimum: 3.4761\n", + "Iteration No: 11 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:48,590\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 11 ended. Search finished for the next optimal point.\n", + "Time taken: 6.9206\n", + "Function value obtained: 7.7113\n", + "Current minimum: 3.4761\n", + "Iteration No: 12 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:27:55,490\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 12 ended. Search finished for the next optimal point.\n", + "Time taken: 7.0718\n", + "Function value obtained: 3.6899\n", + "Current minimum: 3.4761\n", + "Iteration No: 13 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:02,737\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 13 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3528\n", + "Function value obtained: 8.1123\n", + "Current minimum: 3.4761\n", + "Iteration No: 14 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:10,032\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 14 ended. Search finished for the next optimal point.\n", + "Time taken: 7.2734\n", + "Function value obtained: 6.6178\n", + "Current minimum: 3.4761\n", + "Iteration No: 15 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:17,313\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 15 ended. Search finished for the next optimal point.\n", + "Time taken: 6.9263\n", + "Function value obtained: 3.8129\n", + "Current minimum: 3.4761\n", + "Iteration No: 16 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:24,228\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 16 ended. Search finished for the next optimal point.\n", + "Time taken: 7.4502\n", + "Function value obtained: 3.3117\n", + "Current minimum: 3.3117\n", + "Iteration No: 17 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:31,652\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 17 ended. Search finished for the next optimal point.\n", + "Time taken: 7.2443\n", + "Function value obtained: 2.7624\n", + "Current minimum: 2.7624\n", + "Iteration No: 18 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:38,831\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 18 ended. Search finished for the next optimal point.\n", + "Time taken: 7.2384\n", + "Function value obtained: 3.4623\n", + "Current minimum: 2.7624\n", + "Iteration No: 19 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:46,141\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 19 ended. Search finished for the next optimal point.\n", + "Time taken: 7.2537\n", + "Function value obtained: 1.8911\n", + "Current minimum: 1.8911\n", + "Iteration No: 20 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:28:53,438\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 20 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3900\n", + "Function value obtained: 2.7528\n", + "Current minimum: 1.8911\n", + "Iteration No: 21 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:00,823\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 21 ended. Search finished for the next optimal point.\n", + "Time taken: 7.0239\n", + "Function value obtained: 1.7782\n", + "Current minimum: 1.7782\n", + "Iteration No: 22 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:07,728\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 22 ended. Search finished for the next optimal point.\n", + "Time taken: 7.8995\n", + "Function value obtained: 1.9291\n", + "Current minimum: 1.7782\n", + "Iteration No: 23 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:15,649\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 23 ended. Search finished for the next optimal point.\n", + "Time taken: 7.1420\n", + "Function value obtained: 2.1447\n", + "Current minimum: 1.7782\n", + "Iteration No: 24 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:22,806\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 24 ended. Search finished for the next optimal point.\n", + "Time taken: 7.2428\n", + "Function value obtained: 1.7375\n", + "Current minimum: 1.7375\n", + "Iteration No: 25 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:30,149\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 25 ended. Search finished for the next optimal point.\n", + "Time taken: 7.1471\n", + "Function value obtained: 1.9571\n", + "Current minimum: 1.7375\n", + "Iteration No: 26 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:37,189\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 26 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9695\n", + "Function value obtained: 1.7196\n", + "Current minimum: 1.7196\n", + "Iteration No: 27 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:45,309\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 27 ended. Search finished for the next optimal point.\n", + "Time taken: 7.4276\n", + "Function value obtained: 1.9116\n", + "Current minimum: 1.7196\n", + "Iteration No: 28 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:52,623\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 28 ended. Search finished for the next optimal point.\n", + "Time taken: 7.0555\n", + "Function value obtained: 5.8750\n", + "Current minimum: 1.7196\n", + "Iteration No: 29 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:29:59,767\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 29 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3063\n", + "Function value obtained: 1.7385\n", + "Current minimum: 1.7196\n", + "Iteration No: 30 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:06,984\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 30 ended. Search finished for the next optimal point.\n", + "Time taken: 7.0246\n", + "Function value obtained: 1.7629\n", + "Current minimum: 1.7196\n", + "Iteration No: 31 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:14,010\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 31 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0223\n", + "Function value obtained: 1.8089\n", + "Current minimum: 1.7196\n", + "Iteration No: 32 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:22,140\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 32 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3475\n", + "Function value obtained: 1.8630\n", + "Current minimum: 1.7196\n", + "Iteration No: 33 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:29,471\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 33 ended. Search finished for the next optimal point.\n", + "Time taken: 7.1381\n", + "Function value obtained: 2.2714\n", + "Current minimum: 1.7196\n", + "Iteration No: 34 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:36,617\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 34 ended. Search finished for the next optimal point.\n", + "Time taken: 7.0736\n", + "Function value obtained: 5.7942\n", + "Current minimum: 1.7196\n", + "Iteration No: 35 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:43,643\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 35 ended. Search finished for the next optimal point.\n", + "Time taken: 7.1881\n", + "Function value obtained: 4.5248\n", + "Current minimum: 1.7196\n", + "Iteration No: 36 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:50,814\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 36 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3308\n", + "Function value obtained: 1.6878\n", + "Current minimum: 1.6878\n", + "Iteration No: 37 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:30:58,177\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 37 ended. Search finished for the next optimal point.\n", + "Time taken: 7.1597\n", + "Function value obtained: 8.8895\n", + "Current minimum: 1.6878\n", + "Iteration No: 38 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:05,404\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 38 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1257\n", + "Function value obtained: 4.6135\n", + "Current minimum: 1.6878\n", + "Iteration No: 39 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:13,449\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 39 ended. Search finished for the next optimal point.\n", + "Time taken: 7.2257\n", + "Function value obtained: 7.7961\n", + "Current minimum: 1.6878\n", + "Iteration No: 40 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:20,679\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 40 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0782\n", + "Function value obtained: 7.7090\n", + "Current minimum: 1.6878\n", + "Iteration No: 41 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:28,752\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 41 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3119\n", + "Function value obtained: 1.7442\n", + "Current minimum: 1.6878\n", + "Iteration No: 42 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:36,168\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 42 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3256\n", + "Function value obtained: 7.5428\n", + "Current minimum: 1.6878\n", + "Iteration No: 43 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:44,425\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 43 ended. Search finished for the next optimal point.\n", + "Time taken: 8.2707\n", + "Function value obtained: 8.8123\n", + "Current minimum: 1.6878\n", + "Iteration No: 44 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:51,663\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 44 ended. Search finished for the next optimal point.\n", + "Time taken: 8.0775\n", + "Function value obtained: 7.2416\n", + "Current minimum: 1.6878\n", + "Iteration No: 45 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:31:59,751\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 45 ended. Search finished for the next optimal point.\n", + "Time taken: 7.9931\n", + "Function value obtained: 4.1048\n", + "Current minimum: 1.6878\n", + "Iteration No: 46 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:32:07,760\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 46 ended. Search finished for the next optimal point.\n", + "Time taken: 7.3357\n", + "Function value obtained: 10.8838\n", + "Current minimum: 1.6878\n", + "Iteration No: 47 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:32:15,206\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 47 ended. Search finished for the next optimal point.\n", + "Time taken: 7.4287\n", + "Function value obtained: 6.6997\n", + "Current minimum: 1.6878\n", + "Iteration No: 48 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:32:22,631\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 48 ended. Search finished for the next optimal point.\n", + "Time taken: 8.4265\n", + "Function value obtained: 5.9701\n", + "Current minimum: 1.6878\n", + "Iteration No: 49 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:32:31,077\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 49 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1849\n", + "Function value obtained: 6.6603\n", + "Current minimum: 1.6878\n", + "Iteration No: 50 started. Searching for the next optimal point.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-16 22:32:39,149\tINFO worker.py:1749 -- Started a local Ray instance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration No: 50 ended. Search finished for the next optimal point.\n", + "Time taken: 8.1697\n", + "Function value obtained: 8.1234\n", + "Current minimum: 1.6878\n", + "CPU times: user 7min 21s, sys: 9min 19s, total: 16min 41s\n", + "Wall time: 6min 5s\n" + ] + }, + { + "data": { + "text/plain": [ + "(1.6878340780852437, [0.018989021076692904, 0.3169121824404405])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "res = gp_minimize(g, [(0.0, 1.0), (0.0, 1.0)], n_calls = 50, verbose=True)\n", + "res.fun, res.x" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "61e8fc40-6157-4a96-9fad-74ebc4d3f7b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_objective(res)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c5c2ec8-f61b-4dae-bc1b-ba70310a694b", + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# res = gbrt_minimize(g, [(0.0, 0.3), (0, 0.3)], n_calls = 300)\n", + "# res.fun, res.x" + ] + }, + { + "cell_type": "markdown", + "id": "d2346c45-e588-45ab-b7a8-5677a60ac8e9", + "metadata": {}, + "source": [ + "## Test solution" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "01b2030d-e7df-4243-9203-d602e74aa9a8", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'Caribou' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m carib \u001b[38;5;241m=\u001b[39m \u001b[43mCaribou\u001b[49m(config\u001b[38;5;241m=\u001b[39mconfig)\n\u001b[1;32m 3\u001b[0m null_action \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39m np\u001b[38;5;241m.\u001b[39mones(\u001b[38;5;241m2\u001b[39m, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat32)\n\u001b[1;32m 4\u001b[0m high_action \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.5\u001b[39m \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39mones(\u001b[38;5;241m2\u001b[39m, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat32)\n", + "\u001b[0;31mNameError\u001b[0m: name 'Caribou' is not defined" + ] + } + ], + "source": [ + "carib = Caribou(config=config)\n", + "\n", + "null_action = - np.ones(2, dtype=np.float32)\n", + "high_action = 0.5 * np.ones(2, dtype=np.float32)\n", + "all_wolves = np.float32([0.0, 0.5])\n", + "all_moose = np.float32([0.5, 0.0])\n", + "# an_effort = np.float32(res.x)\n", + "an_effort = np.float32([0.018989021076692904, 0.3169121824404405])\n", + "\n", + "an_action = 2 * an_effort - 1\n", + "\n", + "Ms, Bs, Ws, ts, rews = [], [], [], [], []\n", + "obs, _ = carib.reset()\n", + "ep_rew=0\n", + "#\n", + "pop = carib.population_units()\n", + "Ms.append(pop[0])\n", + "Bs.append(pop[1])\n", + "Ws.append(pop[2])\n", + "ts.append(0)\n", + "rews.append(ep_rew)\n", + "#\n", + "for t in range(carib.Tmax):\n", + " ts.append(t+1)\n", + " obs, rew, term, trunc, info = carib.step(an_action)\n", + " pop = carib.population_units()\n", + " Ms.append(pop[0])\n", + " Bs.append(pop[1])\n", + " Ws.append(pop[2])\n", + " ep_rew += rew\n", + " rews.append(ep_rew)\n", + " if term or trunc:\n", + " break\n", + " \n", + "\n", + "ep = pd.DataFrame({\n", + " 't': ts,\n", + " 'm': Ms,\n", + " 'b': Bs,\n", + " 'w': Ws,\n", + "})\n", + "\n", + "ep.plot(x='t', title=f'{ep_rew}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81150bac-2bb1-4a3d-bb92-390188ee85ff", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/scripts/train.py b/scripts/train.py index cad37b3..ad49ef4 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -7,17 +7,27 @@ import rl4caribou +## normalizing work directory +# +import os +abs_filepath = os.path.abspath(args.file) + +# change directory to script's directory (since io uses relative paths) +abspath = os.path.abspath(__file__) +dname = os.path.dirname(abspath) +os.chdir(dname) + # training # from rl4caribou.utils import sb3_train -model_save_id, train_options = sb3_train(args.file, progress_bar=args.progress_bar) +model_save_id, train_options = sb3_train(abs_filepath, progress_bar=args.progress_bar) # hugging face # if 'repo' in train_options: from rl4caribou.utils import upload_to_hf try: - upload_to_hf(args.file, "sb3/"+args.file, repo=train_options['repo']) + upload_to_hf(abs_filepath, "sb3/"+args.file, repo=train_options['repo']) upload_to_hf(model_save_id, "sb3/"+model_save_id+".zip", repo=train_options['repo']) except: print("Couldn't upload to hf!") \ No newline at end of file diff --git a/src/rl4caribou/agents/const_action.py b/src/rl4caribou/agents/const_action.py index 464af2f..ec09888 100644 --- a/src/rl4caribou/agents/const_action.py +++ b/src/rl4caribou/agents/const_action.py @@ -1,7 +1,7 @@ import numpy as np class constAction: - def __init__(self, mortality_vec=np.zeros(2, dtype=np.float32), env = None, **kwargs): + def __init__(self, mortality_vec=np.zeros(3, dtype=np.float32), env = None, **kwargs): # # preprocess if isinstance(mortality_vec, list): diff --git a/src/rl4caribou/envs/caribou.py b/src/rl4caribou/envs/caribou.py index 99d9be7..92385ec 100644 --- a/src/rl4caribou/envs/caribou.py +++ b/src/rl4caribou/envs/caribou.py @@ -6,46 +6,59 @@ def dynamics(pop, effort, harvest_fn, p, timestep=1): pop = harvest_fn(pop, effort) M, B, W = pop[0], pop[1], pop[2] # moose, caribou, wolf - denominator = (1 + B**p['x'] * p['h_B'] * p['a_B'] + M**p['x'] * p['h_M'] * p['a_M']) + p['a_B(t)'] = p['a_B'] * (1 + min(3, 6 * timestep / 800)) + # print(p['a_B(t)']) - return np.float32([ - M + M * ( - p['r_m'] * (1 - p['alpha_mm'] * M / p['K_m']) - - M**(p['x'] - 1) * W * p['a_M'] / denominator - - p['r_m'] * p['alpha_mb'] * B / p['K_m'] - + p['sigma_M'] * np.random.normal() - ), - # - B + B * ( - p['r_b'] * (1 - p['alpha_bb'] * B / p['K_b']) - - B**(p['x']-1) * W * p['a_B'] / denominator - - p['r_b'] * p['alpha_bm'] * M / p['K_b'] - + p['sigma_B'] * np.random.normal() - ), - # - W + W * ( - B**p['x'] * p['a_B'] / denominator - + M**p['x'] * p['a_M'] * p['u'] / denominator - - p['d'] - + p['sigma_W'] * np.random.normal() - ), - ]) + denominator = (1 + B**p['x'] * p['h_B'] * p['a_B(t)'] + M**p['x'] * p['h_M'] * p['a_M']) + + B_zero = 0 if B==0 else 1 + + zero_B_mask = np.float32([1, B_zero, 1]) + # wolf and moose could randomly move into the habitat from elsewhere + + return np.clip( + zero_B_mask * np.float32([ + M + 0.2 * M * ( + p['r_m'] * (1 - p['alpha_mm'] * M / p['K_m']) + - M**(p['x'] - 1) * W * p['a_M'] / denominator + - p['r_m'] * p['alpha_mb'] * B / p['K_m'] + + p['sigma_M'] * np.random.normal() + ) + p['additive_sigma'] * np.random.normal(), + # + B + 0.2 * B * ( + p['r_b'] * (1 - p['alpha_bb'] * B / p['K_b']) + - B**(p['x']-1) * W * p['a_B(t)'] / denominator + - p['r_b'] * p['alpha_bm'] * M / p['K_b'] + + p['sigma_B'] * np.random.normal() + ) + p['additive_sigma'] * np.random.normal(), + # + W + 0.2 *W * ( + B**(p['x']) * p['a_B'] / denominator + + M**(p['x']) * p['a_M'] * p['u'] / denominator + - p['d'] + + p['sigma_W'] * np.random.normal() + ) + p['additive_sigma'] * np.random.normal(), + ]), + a_min = np.float32([0,0,0]), + a_max=None, + ) ## -## Param vals taken from https://doi.org/10.1016/j.ecolmodel.2019.108891 +## Param vals from notebooks/discrete_time.ipynmb experiments ## -am = {"current": 15.32, "full_rest": 11.00} -ab = {"current": 51.45, "full_rest": 26.39} + +am = {"current": 1, "full_rest": 0.5} +ab = {"current": 5, "full_rest": 1} parameters = { - "r_m": np.float32(0.39), - "r_b": np.float32(0.30), + "r_m": np.float32(0.5), + "r_b": np.float32(0.45), # - "alpha_mm": np.float32(1), - "alpha_bb": np.float32(1), - "alpha_bm": np.float32(1), - "alpha_mb": np.float32(1), + "alpha_mm": np.float32(0.1), + "alpha_bb": np.float32(0.1), + "alpha_bm": np.float32(0.05), + "alpha_mb": np.float32(0.05), # "a_M": am["current"], "a_B": ab["current"], @@ -53,16 +66,17 @@ def dynamics(pop, effort, harvest_fn, p, timestep=1): "K_m": np.float32(1.1), "K_b": np.float32(0.40), # - "h_M": np.float32(0.112), - "h_B": np.float32(0.112), + "h_B": np.float32(0.031), + "h_M": np.float32(0.31), # "x": np.float32(2), "u": np.float32(1), - "d": np.float32(1), + "d": np.float32(0.3), # - "sigma_M": np.float32(0.1), - "sigma_B": np.float32(0.1), - "sigma_W": np.float32(0.1), + "sigma_M": np.float32(0.2), + "sigma_B": np.float32(0.25), + "sigma_W": np.float32(0.2), + "additive_sigma": np.float32(0.005), } @@ -70,19 +84,32 @@ def dynamics(pop, effort, harvest_fn, p, timestep=1): ## Harvest, utility ## def harvest(pop, effort): - q0 = 0.5 # catchability coefficients -- erradication is impossible - q2 = 0.5 + q0 = 1 # catchability coefficients -- erradication is impossible + q2 = 1 pop[0] = pop[0] * (1 - effort[0] * q0) # pop 0, moose pop[2] = pop[2] * (1 - effort[1] * q2) # pop 2, wolves return pop -def utility(pop, effort): - benefits = 0.5 * pop[1] # benefit from Caribou - costs = 0.00001 * (effort[0] + effort[1]) # cost to culling - if np.any(pop <= 0.01): - benefits -= 1 - return benefits - costs +def utility(pop, effort, env): + benefit_vec = [0.2, 0.5, 0.2] + benefits = sum(benefit_vec * pop) # benefit from populations + costs = 0.1 * effort[0] + 0.2 * effort[1] # cost to culling + + thresholds = [env.initial_pop[0], 0.1, env.initial_pop[2]] + for i, pop_i in enumerate(pop): + if pop_i < thresholds[i]: + benefits -= 5 * (thresholds[i] - pop_i) + if (pop_i == 0) and (i==1): + # caribou crash + benefits -= 10 + if (pop_i == 0) and not (i==1): + # other crash + benefits -= 5 + return 0.001 * (benefits - costs) + +def triv_observe(state): + return state class Caribou(gym.Env): """A 3-species ecosystem model with two control actions""" @@ -93,18 +120,21 @@ def __init__(self, config=None): ## these parameters may be specified in config self.Tmax = config.get("Tmax", 800) self.max_episode_steps = self.Tmax - self.threshold = config.get("threshold", np.float32(1e-4)) + self.threshold = config.get("threshold", np.float32(1e-3)) self.init_sigma = config.get("init_sigma", np.float32(1e-3)) self.training = config.get("training", True) - self.initial_pop = config.get("initial_pop", np.ones(3, dtype=np.float32)) + self.initial_pop = config.get( + "initial_pop", + np.float32([0.572079, 0.025453, 0.911731]), + ) self.parameters = config.get("parameters", parameters) self.dynamics = config.get("dynamics", dynamics) self.harvest = config.get("harvest", harvest) self.utility = config.get("utility", utility) self.observe = config.get( - "observe", lambda state: state + "observe", triv_observe ) # default to perfectly observed case - self.bound = 2 + self.bound = 10 self.action_space = gym.spaces.Box( np.array([-1, -1], dtype=np.float32), @@ -120,12 +150,11 @@ def __init__(self, config=None): def reset(self, *, seed=None, options=None): self.timestep = 0 - self.initial_pop += np.multiply( - self.initial_pop, np.float32(self.init_sigma * np.random.normal(size=3)) - ) + # self.initial_pop = self.initial_pop * (1 + self.init_sigma * np.random.normal(size=3)) self.state = self.state_units(self.initial_pop) info = {} - return self.observe(self.state), info + observation = self.observe(self.state) + return observation, info def step(self, action): action = np.clip(action, self.action_space.low, self.action_space.high) @@ -133,7 +162,7 @@ def step(self, action): effort = (action + 1.0) / 2 # harvest and recruitment - reward = self.utility(pop, effort) + reward = self.utility(pop, effort, self) nextpop = self.dynamics( pop, effort, self.harvest, self.parameters, self.timestep ) @@ -141,10 +170,10 @@ def step(self, action): self.timestep += 1 terminated = bool(self.timestep > self.Tmax) - # in training mode only: punish for population collapse - if any(pop <= self.threshold) and self.training: - terminated = True - reward -= 50 / self.timestep + # # in training mode only: punish for population collapse + # if any(pop <= self.threshold) and self.training: + # terminated = True + # reward -= 100 / self.timestep self.state = self.state_units(nextpop) # transform into [-1, 1] space observation = self.observe(self.state) # same as self.state diff --git a/src/rl4caribou/envs/caribou_ode.py b/src/rl4caribou/envs/caribou_ode.py index b282c82..2cdaa75 100644 --- a/src/rl4caribou/envs/caribou_ode.py +++ b/src/rl4caribou/envs/caribou_ode.py @@ -16,7 +16,9 @@ def dynamics_scipy(pop, effort, p, timestep, singularities): ) * np.random.normal(size=3) ) - return odeint(ode_func, y0, t_interval, args=(effort, p), tcrit=singularities)[1] + timestep_randomness * dt + new_pop = odeint(ode_func, y0, t_interval, args=(effort, p), tcrit=singularities)[1] + + return new_pop + timestep_randomness * dt def ode_func(y, t, effort, p): M, B, W = y @@ -114,11 +116,18 @@ def harvest(pop, effort): def utility(pop, effort): benefits = 1 * pop[1] # benefit from Caribou - costs = 0.1 * (effort[0] + effort[1]) + 0.1 * effort[2] # cost to culling + cost of restoring - if np.any(pop <= [0.03, 0.07, 1e-4]): + costs = 0.1 * (effort[0] + effort[1]) + 0.4 * effort[2] # cost to culling + cost of restoring + if np.any(pop <= [0.05, 0.01, 0.001]): benefits -= 1 return benefits - costs +def observe_3pop_restoration(env): + rest_obs = -1 + 2 * ( + (env.current_ab - env.parameters["a_B"]) + / (env.current_ab - env.restored_ab) + ) + return np.float32([*env.state, rest_obs]) + class CaribouScipy(gym.Env): """A 3-species ecosystem model with two control actions""" @@ -131,7 +140,7 @@ def __init__(self, config=None): self.threshold = config.get("threshold", np.float32(1e-4)) self.init_sigma = config.get("init_sigma", np.float32(1e-3)) self.training = config.get("training", True) - self.initial_pop = config.get("initial_pop", np.float32([0.3, 0.15, 0.05])) + self.initial_pop = np.float32(config.get("initial_pop", [0.268, 0.023, 0.079])) # self.current_am = 15.32 self.restored_am = 11.00 @@ -170,7 +179,7 @@ def __init__(self, config=None): self.harvest = config.get("harvest", harvest) self.utility = config.get("utility", utility) self.observe = config.get( - "observe", lambda state: state + "observe", observe_3pop_restoration ) # default to perfectly observed case self.bound = 2 @@ -180,8 +189,8 @@ def __init__(self, config=None): dtype=np.float32, ) self.observation_space = gym.spaces.Box( - np.array([-1, -1, -1], dtype=np.float32), - np.array([1, 1, 1], dtype=np.float32), + np.array([-1, -1, -1, -1], dtype=np.float32), + np.array([1, 1, 1, 1], dtype=np.float32), dtype=np.float32, ) self.reset(seed=config.get("seed", None)) @@ -193,7 +202,7 @@ def reset(self, *, seed=None, options=None): ) self.state = self.state_units(self.true_initial_pop) info = {} - return self.observe(self.state), info + return self.observe(self), info def step(self, action): action = np.clip(action, self.action_space.low, self.action_space.high) @@ -216,7 +225,7 @@ def step(self, action): truncated = bool(self.timestep > self.Tmax) # or bool(any(nextpop < 1e-7)) self.state = self.state_units(nextpop) # transform into [-1, 1] space - observation = self.observe(self.state) # same as self.state + observation = self.observe(self) # same as self.state return observation, reward, False, truncated, {} def state_units(self, pop): diff --git a/src/rl4caribou/utils/sb3.py b/src/rl4caribou/utils/sb3.py index 67287f1..e581eb9 100644 --- a/src/rl4caribou/utils/sb3.py +++ b/src/rl4caribou/utils/sb3.py @@ -36,13 +36,58 @@ def algorithm(algo): 'tqc': TQC, } return algos[algo] - def sb3_train(config_file, **kwargs): with open(config_file, "r") as stream: options = yaml.safe_load(stream) options = {**options, **kwargs} # updates / expands on yaml options with optional user-provided input + if 'additional_imports' in options: + import importlib + for module in options['additional_imports']: + print(f"importing {module}") + module = importlib.import_module(module) + globals()[module.__name__] = module + + if "n_envs" in options: + env = make_vec_env( + options["env_id"], options["n_envs"], env_kwargs={"config": options["config"]} + ) + else: + env = gym.make(options["env_id"]) + + if ( + 'policy_kwargs' in options['algo_config'] and + isinstance(options['algo_config']['policy_kwargs'], str) + ): + options['algo_config']['policy_kwargs'] = eval(options['algo_config']['policy_kwargs']) + + ALGO = algorithm(options["algo"]) + if "id" in options: + options["id"] = "-" + options["id"] + model_id = options["algo"] + "-" + options["env_id"] + options.get("id", "") + save_id = os.path.join(options["save_path"], model_id) + + model = ALGO( + env=env, + **options['algo_config'] + ) + + progress_bar = options.get("progress_bar", False) + model.learn(total_timesteps=options["total_timesteps"], tb_log_name=model_id, progress_bar=progress_bar) + + os.makedirs(options["save_path"], exist_ok=True) + model.save(save_id) + print(f"Saved {options['algo']} model at {save_id}") + + return save_id, options + +def sb3_train_old(config_file, **kwargs): + with open(config_file, "r") as stream: + options = yaml.safe_load(stream) + options = {**options, **kwargs} + # updates / expands on yaml options with optional user-provided input + if "n_envs" in options: env = make_vec_env( options["env_id"], options["n_envs"], env_kwargs={"config": options["config"]}