|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "markdown",
|
5 |
| - "id": "3ceb4719", |
| 5 | + "id": "acc138b0", |
6 | 6 | "metadata": {},
|
7 | 7 | "source": [
|
8 | 8 | "# Basic BVAS demo using simulated data"
|
9 | 9 | ]
|
10 | 10 | },
|
11 | 11 | {
|
12 | 12 | "cell_type": "code",
|
13 |
| - "execution_count": 1, |
14 |
| - "id": "aceebab8", |
| 13 | + "execution_count": 25, |
| 14 | + "id": "105ca8b5", |
15 | 15 | "metadata": {},
|
16 | 16 | "outputs": [],
|
17 | 17 | "source": [
|
18 |
| - "from bvas import simulate_data, BVASSelector" |
| 18 | + "from bvas import simulate_data, BVASSelector\n", |
| 19 | + "from bvas.map import map_inference\n", |
| 20 | + "import pandas as pd\n", |
| 21 | + "import numpy as np" |
19 | 22 | ]
|
20 | 23 | },
|
21 | 24 | {
|
22 | 25 | "cell_type": "markdown",
|
23 |
| - "id": "fef691b9", |
| 26 | + "id": "d4ae7daa", |
24 | 27 | "metadata": {},
|
25 | 28 | "source": [
|
26 | 29 | "### Simulate data"
|
|
29 | 32 | {
|
30 | 33 | "cell_type": "code",
|
31 | 34 | "execution_count": 2,
|
32 |
| - "id": "8f170790", |
| 35 | + "id": "67c8b3f2", |
33 | 36 | "metadata": {},
|
34 | 37 | "outputs": [],
|
35 | 38 | "source": [
|
|
47 | 50 | {
|
48 | 51 | "cell_type": "code",
|
49 | 52 | "execution_count": 3,
|
50 |
| - "id": "f74e7b1b", |
| 53 | + "id": "a8592b05", |
51 | 54 | "metadata": {},
|
52 | 55 | "outputs": [
|
53 | 56 | {
|
|
73 | 76 | },
|
74 | 77 | {
|
75 | 78 | "cell_type": "markdown",
|
76 |
| - "id": "24a7ce7f", |
| 79 | + "id": "929783f8", |
77 | 80 | "metadata": {},
|
78 | 81 | "source": [
|
79 | 82 | "### Instantiate BVASSelector object"
|
|
82 | 85 | {
|
83 | 86 | "cell_type": "code",
|
84 | 87 | "execution_count": 4,
|
85 |
| - "id": "617cb379", |
| 88 | + "id": "4a7a3d81", |
86 | 89 | "metadata": {},
|
87 | 90 | "outputs": [],
|
88 | 91 | "source": [
|
|
99 | 102 | },
|
100 | 103 | {
|
101 | 104 | "cell_type": "markdown",
|
102 |
| - "id": "884fedf0", |
| 105 | + "id": "56b0072c", |
103 | 106 | "metadata": {},
|
104 | 107 | "source": [
|
105 | 108 | "### Run BVAS MCMC-based inference"
|
|
108 | 111 | {
|
109 | 112 | "cell_type": "code",
|
110 | 113 | "execution_count": 5,
|
111 |
| - "id": "77bcb9bd", |
| 114 | + "id": "9c285218", |
112 | 115 | "metadata": {},
|
113 | 116 | "outputs": [
|
114 | 117 | {
|
115 | 118 | "data": {
|
116 | 119 | "application/vnd.jupyter.widget-view+json": {
|
117 |
| - "model_id": "cc591be1cd164f68be7f385dc2537701", |
| 120 | + "model_id": "3ddab09989224a008f51f31c69706a59", |
118 | 121 | "version_major": 2,
|
119 | 122 | "version_minor": 0
|
120 | 123 | },
|
|
132 | 135 | },
|
133 | 136 | {
|
134 | 137 | "cell_type": "markdown",
|
135 |
| - "id": "86691da7", |
| 138 | + "id": "0b3a07cc", |
136 | 139 | "metadata": {},
|
137 | 140 | "source": [
|
138 | 141 | "### Inspect results\n",
|
|
149 | 152 | {
|
150 | 153 | "cell_type": "code",
|
151 | 154 | "execution_count": 6,
|
152 |
| - "id": "f1ae71fd", |
| 155 | + "id": "a13e39fc", |
153 | 156 | "metadata": {},
|
154 | 157 | "outputs": [
|
155 | 158 | {
|
|
182 | 185 | {
|
183 | 186 | "cell_type": "code",
|
184 | 187 | "execution_count": 9,
|
185 |
| - "id": "6d2a48cd", |
| 188 | + "id": "81e46a30", |
186 | 189 | "metadata": {},
|
187 | 190 | "outputs": [
|
188 | 191 | {
|
|
211 | 214 | {
|
212 | 215 | "cell_type": "code",
|
213 | 216 | "execution_count": 10,
|
214 |
| - "id": "e5b9424c", |
| 217 | + "id": "7b64636a", |
215 | 218 | "metadata": {},
|
216 | 219 | "outputs": [],
|
217 | 220 | "source": [
|
218 | 221 | "# the remaining coefficients are all zero\n",
|
219 | 222 | "assert data['true_betas'][10:].min().item() == data['true_betas'][10:].max().item() == 0.0"
|
220 | 223 | ]
|
| 224 | + }, |
| 225 | + { |
| 226 | + "cell_type": "markdown", |
| 227 | + "id": "7149875f", |
| 228 | + "metadata": {}, |
| 229 | + "source": [ |
| 230 | + "# Compare to MAP inference\n", |
| 231 | + "\n", |
| 232 | + "Let's compare to Maximum A posteriorir (i.e. MAP) inference as in [Inferring effects of mutations on SARS-CoV-2 transmission from genomic surveillance data](https://www.medrxiv.org/content/10.1101/2021.12.31.21268591v2)." |
| 233 | + ] |
| 234 | + }, |
| 235 | + { |
| 236 | + "cell_type": "code", |
| 237 | + "execution_count": 64, |
| 238 | + "id": "975411eb", |
| 239 | + "metadata": {}, |
| 240 | + "outputs": [], |
| 241 | + "source": [ |
| 242 | + "map_results = map_inference(data['Y'], data['Gamma'], taus=[2048.0])\n", |
| 243 | + "inferred_beta = map_results['map_2048.0']['beta']" |
| 244 | + ] |
| 245 | + }, |
| 246 | + { |
| 247 | + "cell_type": "code", |
| 248 | + "execution_count": 65, |
| 249 | + "id": "60cffa06", |
| 250 | + "metadata": {}, |
| 251 | + "outputs": [], |
| 252 | + "source": [ |
| 253 | + "# package results as Pandas DataFrame\n", |
| 254 | + "inferred_beta = pd.DataFrame(inferred_beta, index=mutations, columns=['Beta'])\n", |
| 255 | + "inferred_beta['BetaAbs'] = np.fabs(inferred_beta)\n", |
| 256 | + "inferred_beta = inferred_beta.sort_values(by='BetaAbs', ascending=False)\n", |
| 257 | + "inferred_beta['Rank'] = 1 + np.arange(inferred_beta.shape[0])\n", |
| 258 | + "inferred_beta = inferred_beta[['Beta', 'Rank']]" |
| 259 | + ] |
| 260 | + }, |
| 261 | + { |
| 262 | + "cell_type": "code", |
| 263 | + "execution_count": 67, |
| 264 | + "id": "45a53f68", |
| 265 | + "metadata": {}, |
| 266 | + "outputs": [ |
| 267 | + { |
| 268 | + "data": { |
| 269 | + "text/html": [ |
| 270 | + "<div>\n", |
| 271 | + "<style scoped>\n", |
| 272 | + " .dataframe tbody tr th:only-of-type {\n", |
| 273 | + " vertical-align: middle;\n", |
| 274 | + " }\n", |
| 275 | + "\n", |
| 276 | + " .dataframe tbody tr th {\n", |
| 277 | + " vertical-align: top;\n", |
| 278 | + " }\n", |
| 279 | + "\n", |
| 280 | + " .dataframe thead th {\n", |
| 281 | + " text-align: right;\n", |
| 282 | + " }\n", |
| 283 | + "</style>\n", |
| 284 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 285 | + " <thead>\n", |
| 286 | + " <tr style=\"text-align: right;\">\n", |
| 287 | + " <th></th>\n", |
| 288 | + " <th>Beta</th>\n", |
| 289 | + " <th>Rank</th>\n", |
| 290 | + " </tr>\n", |
| 291 | + " </thead>\n", |
| 292 | + " <tbody>\n", |
| 293 | + " <tr>\n", |
| 294 | + " <th>Causal9</th>\n", |
| 295 | + " <td>-0.053871</td>\n", |
| 296 | + " <td>1</td>\n", |
| 297 | + " </tr>\n", |
| 298 | + " <tr>\n", |
| 299 | + " <th>Causal5</th>\n", |
| 300 | + " <td>0.049838</td>\n", |
| 301 | + " <td>2</td>\n", |
| 302 | + " </tr>\n", |
| 303 | + " <tr>\n", |
| 304 | + " <th>Causal10</th>\n", |
| 305 | + " <td>-0.048263</td>\n", |
| 306 | + " <td>3</td>\n", |
| 307 | + " </tr>\n", |
| 308 | + " <tr>\n", |
| 309 | + " <th>Causal4</th>\n", |
| 310 | + " <td>0.045866</td>\n", |
| 311 | + " <td>4</td>\n", |
| 312 | + " </tr>\n", |
| 313 | + " <tr>\n", |
| 314 | + " <th>Causal3</th>\n", |
| 315 | + " <td>0.027333</td>\n", |
| 316 | + " <td>5</td>\n", |
| 317 | + " </tr>\n", |
| 318 | + " <tr>\n", |
| 319 | + " <th>Causal8</th>\n", |
| 320 | + " <td>-0.021542</td>\n", |
| 321 | + " <td>6</td>\n", |
| 322 | + " </tr>\n", |
| 323 | + " <tr>\n", |
| 324 | + " <th>Spurious80</th>\n", |
| 325 | + " <td>0.020984</td>\n", |
| 326 | + " <td>7</td>\n", |
| 327 | + " </tr>\n", |
| 328 | + " <tr>\n", |
| 329 | + " <th>Spurious44</th>\n", |
| 330 | + " <td>-0.017381</td>\n", |
| 331 | + " <td>8</td>\n", |
| 332 | + " </tr>\n", |
| 333 | + " <tr>\n", |
| 334 | + " <th>Spurious68</th>\n", |
| 335 | + " <td>-0.015019</td>\n", |
| 336 | + " <td>9</td>\n", |
| 337 | + " </tr>\n", |
| 338 | + " <tr>\n", |
| 339 | + " <th>Spurious61</th>\n", |
| 340 | + " <td>0.014249</td>\n", |
| 341 | + " <td>10</td>\n", |
| 342 | + " </tr>\n", |
| 343 | + " <tr>\n", |
| 344 | + " <th>Spurious38</th>\n", |
| 345 | + " <td>0.014112</td>\n", |
| 346 | + " <td>11</td>\n", |
| 347 | + " </tr>\n", |
| 348 | + " <tr>\n", |
| 349 | + " <th>Spurious85</th>\n", |
| 350 | + " <td>0.012077</td>\n", |
| 351 | + " <td>12</td>\n", |
| 352 | + " </tr>\n", |
| 353 | + " <tr>\n", |
| 354 | + " <th>Spurious90</th>\n", |
| 355 | + " <td>0.012060</td>\n", |
| 356 | + " <td>13</td>\n", |
| 357 | + " </tr>\n", |
| 358 | + " <tr>\n", |
| 359 | + " <th>Spurious66</th>\n", |
| 360 | + " <td>0.011890</td>\n", |
| 361 | + " <td>14</td>\n", |
| 362 | + " </tr>\n", |
| 363 | + " <tr>\n", |
| 364 | + " <th>Spurious70</th>\n", |
| 365 | + " <td>0.011479</td>\n", |
| 366 | + " <td>15</td>\n", |
| 367 | + " </tr>\n", |
| 368 | + " </tbody>\n", |
| 369 | + "</table>\n", |
| 370 | + "</div>" |
| 371 | + ], |
| 372 | + "text/plain": [ |
| 373 | + " Beta Rank\n", |
| 374 | + "Causal9 -0.053871 1\n", |
| 375 | + "Causal5 0.049838 2\n", |
| 376 | + "Causal10 -0.048263 3\n", |
| 377 | + "Causal4 0.045866 4\n", |
| 378 | + "Causal3 0.027333 5\n", |
| 379 | + "Causal8 -0.021542 6\n", |
| 380 | + "Spurious80 0.020984 7\n", |
| 381 | + "Spurious44 -0.017381 8\n", |
| 382 | + "Spurious68 -0.015019 9\n", |
| 383 | + "Spurious61 0.014249 10\n", |
| 384 | + "Spurious38 0.014112 11\n", |
| 385 | + "Spurious85 0.012077 12\n", |
| 386 | + "Spurious90 0.012060 13\n", |
| 387 | + "Spurious66 0.011890 14\n", |
| 388 | + "Spurious70 0.011479 15" |
| 389 | + ] |
| 390 | + }, |
| 391 | + "execution_count": 67, |
| 392 | + "metadata": {}, |
| 393 | + "output_type": "execute_result" |
| 394 | + } |
| 395 | + ], |
| 396 | + "source": [ |
| 397 | + "# MAP places 6/10 of the causal alleles at the top\n", |
| 398 | + "inferred_beta.iloc[:15]" |
| 399 | + ] |
221 | 400 | }
|
222 | 401 | ],
|
223 | 402 | "metadata": {
|
|
0 commit comments