updates

ggsavin · Jan 27, 2024 · 8214f26 · 8214f26
1 parent 77e0bef
commit 8214f26
Show file tree

Hide file tree

Showing 9 changed files with 580 additions and 129 deletions.
diff --git a/approval_training.png b/approval_training.png
diff --git a/compare_training.png b/compare_training.png
diff --git a/notebooks/oracle.ipynb b/notebooks/oracle.ipynb
@@ -0,0 +1,275 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "import sys\n",
+    "sys.path.append('../')\n",
+    "from voting_games.werewolf_env_v0 import pare, Roles, Phase\n",
+    "from notebooks.learning_agents.models import ActorCriticAgent\n",
+    "from notebooks.learning_agents.utils import play_static_game, play_recurrent_game\n",
+    "from notebooks.learning_agents.static_agents import (\n",
+    "    random_approval_villager, \n",
+    "    random_coordinated_approval_villager, \n",
+    "    random_agent,\n",
+    "    random_approval_wolf,\n",
+    "    revenge_approval_wolf,\n",
+    "    coordinated_revenge_approval_wolf,\n",
+    "    random_likes_approval_wolf,\n",
+    "    aggressive_approval_wolf,\n",
+    "    )\n",
+    "import notebooks.learning_agents.stats as indicators\n",
+    "import random\n",
+    "import copy\n",
+    "from matplotlib import pyplot as plt\n",
+    "from tqdm import tqdm\n",
+    "from tabulate import tabulate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Oracle Approval Voting\n",
+    "\n",
+    "To test the theory that agents have learned how somewhat trust eachother based on likes and neutrals, we introduce a single oracle villager that likes every villager and dislikes every werewolf every single turn, except for voting rounds, where they abstain from voting altogether.\n",
+    "\n",
+    "The thought process behind this is that even though villagers lose a valuable vote, the oracle's voting pattern will overcome this"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "env = pare(num_agents=10, werewolves=2, num_accusations=2)\n",
+    "observations, _, _, _, _ = env.reset()\n",
+    "obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]\n",
+    "\n",
+    "trained_approval_agent = ActorCriticAgent({\"rec_hidden_size\": 256,\n",
+    "                                        \"rec_layers\": 1, \n",
+    "                                        \"joint_mlp_size\": 128,\n",
+    "                                        \"split_mlp_size\": 128,\n",
+    "                                        \"num_votes\": 10,\n",
+    "                                        \"approval_states\": 3},\n",
+    "                                        num_players=10,\n",
+    "                                        obs_size=obs_size)\n",
+    "trained_approval_agent.load_state_dict(torch.load(\"../notebooks/stored_agents/lstm_first_no_one_hot_256_128/approval_agent_10_score_53.pth\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Without Oracle wins : 0.58\n"
+     ]
+    }
+   ],
+   "source": [
+    "@torch.no_grad()\n",
+    "def play_recurrent_game_with_oracle(env, wolf_policy, villager_agent, num_times=10, hidden_state_size=None, voting_type=None, static_villager_policy=\"oracle\", vtc_other_villagers=0, p=0.5):\n",
+    "\n",
+    "    wins = 0\n",
+    "    game_replays = []\n",
+    "\n",
+    "    for _ in range(num_times):\n",
+    "        next_observations, _, _, _, _ = env.reset()\n",
+    "        # init recurrent stuff for actor and critic to 0 as well\n",
+    "        static_villager = set([random.choice(list(set(env.agents) & set(env.world_state[\"villagers\"])))])\n",
+    "        if static_villager_policy==\"oracle\":\n",
+    "            env.set_oracle(int(list(static_villager)[0].split(\"_\")[-1]))\n",
+    "\n",
+    "        magent_obs = {agent: {'obs': [], \n",
+    "                              # obs size, and 1,1,64 as we pass batch first\n",
+    "                              'hcxs': [(torch.zeros((1,1,hidden_state_size), dtype=torch.float32), \n",
+    "                                        torch.zeros((1,1,hidden_state_size), dtype=torch.float32))],\n",
+    "                    } for agent in env.agents if not env.agent_roles[agent]}\n",
+    "\n",
+    "        wolf_action = None\n",
+    "\n",
+    "        while env.agents:\n",
+    "            observations = copy.deepcopy(next_observations)\n",
+    "            actions = {}\n",
+    "\n",
+    "            villagers = set(env.agents) & set(env.world_state[\"villagers\"]) - static_villager\n",
+    "            wolves = set(env.agents) & set(env.world_state[\"werewolves\"])\n",
+    "\n",
+    "            ## VILLAGER LOGIC ##\n",
+    "            v_obs = torch.cat([torch.unsqueeze(torch.tensor(env.convert_obs(observations[villager]['observation']), dtype=torch.float), 0) for villager in villagers])\n",
+    "\n",
+    "            # TODO: maybe this can be sped up? \n",
+    "            hxs, cxs = zip(*[(hxs, cxs) for hxs, cxs in [magent_obs[villager][\"hcxs\"][-1] for villager in villagers]])\n",
+    "            hxs = torch.swapaxes(torch.cat(hxs),0,1)\n",
+    "            cxs = torch.swapaxes(torch.cat(cxs),0,1)\n",
+    "\n",
+    "            policies, _ , cells = villager_agent(v_obs, (hxs, cxs))\n",
+    "            v_actions = torch.stack([p.sample() for p in policies], dim=1)\n",
+    "\n",
+    "            hxs_new, cxs_new = cells\n",
+    "            hxs_new = torch.swapaxes(hxs_new,1,0)\n",
+    "            cxs_new = torch.swapaxes(cxs_new,1,0)\n",
+    "\n",
+    "            for i, villager in enumerate(villagers):\n",
+    "                if voting_type == \"plurality\":\n",
+    "                    actions[villager] = v_actions[i].item()\n",
+    "                elif voting_type == \"approval\":\n",
+    "                    actions[villager] = (v_actions[i] - 1).tolist()\n",
+    "                magent_obs[villager]['hcxs'].append((torch.unsqueeze(hxs_new[i], 0), torch.unsqueeze(cxs_new[i], 0)))\n",
+    "\n",
+    "            # if oracle is still alive\n",
+    "            if set(env.agents) & static_villager:\n",
+    "                static_villager_id = list(static_villager)[0]\n",
+    "\n",
+    "                # get mode of the villager votes\n",
+    "                # max(lst, key=lst.count)\n",
+    "\n",
+    "                if static_villager_policy == \"oracle\":\n",
+    "\n",
+    "                    # vtc_other_villagers : what should the oracle do towards other villagers.\n",
+    "                    actions[static_villager_id] = [(-1 if random.random() < p else 0) if player in env.world_state['werewolves'] else vtc_other_villagers for player in env.possible_agents]\n",
+    "                    #actions[static_villager_id] = [-1 if player in env.world_state['werewolves'] else 0 for player in env.possible_agents]\n",
+    "                else: \n",
+    "                    actions[static_villager_id] = random_agent(env, static_villager_id, action=None)\n",
+    "\n",
+    "            ## WOLF LOGIC ## \n",
+    "            phase = env.world_state['phase']\n",
+    "            for wolf in wolves:\n",
+    "                wolf_action = wolf_policy(env, wolf, action=wolf_action)\n",
+    "                actions[wolf] = wolf_action\n",
+    "\n",
+    "            next_observations, _, _, _, _ = env.step(actions)\n",
+    "\n",
+    "            ## UPDATED WOLF VARIABLE FOR WOLVES THAT COORDINATE ## \n",
+    "            if env.world_state['phase'] == Phase.NIGHT:\n",
+    "                wolf_action = None\n",
+    "            \n",
+    "            if env.world_state['phase'] == Phase.ACCUSATION and phase == Phase.NIGHT:\n",
+    "                wolf_action = None\n",
+    "            \n",
+    "        ## Fill bigger buffer, keeping in mind sequence\n",
+    "        winner = env.world_state['winners']\n",
+    "        if winner == Roles.VILLAGER:\n",
+    "            wins += 1\n",
+    "\n",
+    "        game_replays.append(copy.deepcopy(env.history))\n",
+    "    \n",
+    "    return wins, game_replays\n",
+    "\n",
+    "num_times = 1000\n",
+    "wins, replays = play_recurrent_game(env, random_approval_wolf, trained_approval_agent, num_times=num_times, hidden_state_size=256, voting_type=\"approval\")\n",
+    "print(f'Without Oracle wins : {wins/float(num_times):.2f}')\n",
+    "\n",
+    "def get_w_vote(p):\n",
+    "    return -1 if random.random() < p else 0\n",
+    "\n",
+    "num_times=1000\n",
+    "vill_vote = [1, 0]\n",
+    "p = [0.0,0.25,0.5,0.75,1.0]\n",
+    "oracle_res = []\n",
+    "for how_v in vill_vote:\n",
+    "    for percent in p:\n",
+    "        wins, replays = play_recurrent_game_with_oracle(env, \n",
+    "                                                        random_approval_wolf, \n",
+    "                                                        trained_approval_agent, \n",
+    "                                                        num_times=num_times, \n",
+    "                                                        hidden_state_size=256, \n",
+    "                                                        voting_type=\"approval\", \n",
+    "                                                        static_villager_policy=\"oracle\",\n",
+    "                                                        vtc_other_villagers=how_v,\n",
+    "                                                        p=percent)\n",
+    "        print(f'Oracle wins with {how_v} for villagers and a chance of voting for werewolves at {percent}: {wins/float(num_times):.2f}')\n",
+    "        oracle_res.append(f'{wins/float(num_times):.2f}')\n",
+    "    print(\"\\n\")\n",
+    "#wins, replays = play_recurrent_game_with_oracle(env, random_approval_wolf, trained_approval_agent, num_times=num_times, hidden_state_size=256, voting_type=\"approval\", static_villager_policy=\"oracle\")\n",
+    "# print(f'With Oracle wins : {wins/float(num_times):.2f}')\n",
+    "\n",
+    "# num_times = 1000\n",
+    "# wins, replays = play_recurrent_game_with_oracle(env, random_approval_wolf, trained_approval_agent, num_times=num_times, hidden_state_size=256, voting_type=\"approval\", static_villager_policy=\"random\")\n",
+    "# print(f'With other random villager wins : {wins/float(num_times):.2f}')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid decimal literal (114482663.py, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  Cell \u001b[0;32mIn[16], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m    10.2222:.2f\u001b[0m\n\u001b[0m             ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid decimal literal\n"
+     ]
+    }
+   ],
+   "source": [
+    "10.2222:.2f"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "'int' object is not callable",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m/workspaces/voting-games/notebooks/oracle.ipynb Cell 6\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell://dev-container%2B7b22686f737450617468223a225c5c5c5c77736c2e6c6f63616c686f73745c5c5562756e74755c5c686f6d655c5c67736176696e5c5c6361726c65746f6e5c5c766f74696e672d67616d6573222c22636f6e66696746696c65223a7b22246d6964223a312c2270617468223a222f686f6d652f67736176696e2f6361726c65746f6e2f766f74696e672d67616d65732f2e646576636f6e7461696e65722f646576636f6e7461696e65722e6a736f6e222c22736368656d65223a227673636f64652d66696c65486f7374227d7d/workspaces/voting-games/notebooks/oracle.ipynb#W5sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m a()\n",
+      "\u001b[0;31mTypeError\u001b[0m: 'int' object is not callable"
+     ]
+    }
+   ],
+   "source": [
+    "a()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}