add basic_demo.ipynb

martinjankowiak · martinjankowiak · commit 7fb0009ea6b8 · 2022-04-12T18:39:36.000-04:00
diff --git a/notebooks/basic_demo.ipynb b/notebooks/basic_demo.ipynb
@@ -0,0 +1,223 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6460f518",
+   "metadata": {},
+   "source": [
+    "# Basic BVAS demo using simulated data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8137976b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bvas import simulate_data, BVASSelector"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44e25a88",
+   "metadata": {},
+   "source": [
+    "### Simulate data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "054dd652",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = simulate_data(num_alleles=100, duration=26, num_variants=100, num_regions=10,\n",
+    "                     k=0.1, seed=0, sampling_rate=10, strategy='global-median')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "8362f2f2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Y torch.Size([100])\n",
+      "Gamma torch.Size([100, 100])\n",
+      "estimated_nu_eff (1,)\n",
+      "true_betas torch.Size([100])\n",
+      "\n",
+      "Estimated effective population size: 490.3\n"
+     ]
+    }
+   ],
+   "source": [
+    "# inspect simulated data\n",
+    "for k, v in data.items():\n",
+    "    print(k, v.shape)\n",
+    "    \n",
+    "print(\"\\nEstimated effective population size: {:.1f}\".format(data['estimated_nu_eff'].item()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6bee4bdc",
+   "metadata": {},
+   "source": [
+    "### Instantiate BVASSelector object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "bdc84a1f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create names for our 100 alleles (the first 10 alleles are non-neutral in the simulation)\n",
+    "mutations = [\"Causal{}\".format(k) for k in range(1, 11)] \n",
+    "mutations += [\"Spurious{}\".format(k) for k in range(11, 101)] \n",
+    "\n",
+    "selector = BVASSelector(data['Y'], \n",
+    "                        data['Gamma'], \n",
+    "                        mutations, \n",
+    "                        S=5.0,\n",
+    "                        tau=100.0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ccca6ebd",
+   "metadata": {},
+   "source": [
+    "### Run BVAS MCMC-based inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "08f1f267",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b7a31fde7b0d46e4ad1a9a6ef8090a5c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1500 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "selector.run(T=1000, T_burnin=500)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c6ada0e",
+   "metadata": {},
+   "source": [
+    "### Inspect results\n",
+    "\n",
+    "- We find that 8 of the 10 true causal alleles are assigned large PIPs\n",
+    "- We find that 2 of the 10 true causal alleles are missed (i.e. those with small effect sizes, namely Causal1 and Causal6)\n",
+    "- We find that no spurious alleles are assigned large PIPs\n",
+    "- We see that the beta estimates are regularized somewhat towards zero"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "68e34a56",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                 PIP      Beta   BetaStd  Rank\n",
+      "Causal4     0.999999  0.051166  0.006216     1\n",
+      "Causal5     0.999999  0.066024  0.006334     2\n",
+      "Causal10    0.999999 -0.068597  0.008873     3\n",
+      "Causal9     0.999999 -0.065844  0.010435     4\n",
+      "Causal3     0.999903  0.039212  0.006989     5\n",
+      "Causal8     0.881331 -0.026670  0.013342     6\n",
+      "Causal7     0.250416 -0.005248  0.010489     7\n",
+      "Causal2     0.133008  0.003241  0.008441     8\n",
+      "Spurious89  0.023355 -0.000385  0.002696     9\n",
+      "Spurious24  0.018575  0.000173  0.001668    10\n",
+      "Spurious70  0.016120  0.000370  0.002402    11\n",
+      "Spurious80  0.013704  0.000187  0.001622    12\n",
+      "Spurious21  0.011855  0.000109  0.001287    13\n",
+      "Spurious16  0.009838 -0.000098  0.001316    14\n",
+      "Spurious44  0.009189 -0.000078  0.001053    15\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(selector.summary.iloc[:15][['PIP', 'Beta', 'BetaStd', 'Rank']])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "3753108b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[Causal1]\t0.01\n",
+      "[Causal2]\t0.02\n",
+      "[Causal3]\t0.04\n",
+      "[Causal4]\t0.06\n",
+      "[Causal5]\t0.08\n",
+      "[Causal6]\t-0.01\n",
+      "[Causal7]\t-0.02\n",
+      "[Causal8]\t-0.04\n",
+      "[Causal9]\t-0.06\n",
+      "[Causal10]\t-0.08\n"
+     ]
+    }
+   ],
+   "source": [
+    "# print true betas for the causal coefficients\n",
+    "for mutation, beta in zip(mutations[:10], data['true_betas'][:10]):\n",
+    "    print(\"[{}]\\t{:.2f}\".format(mutation, beta.item()))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}