1
- """
2
- Unlike most of the code in this repository, This code requires pyro.
3
- See https://github.com/pyro-ppl/pyro#installing for installation instructions.
4
- """
1
+ import numpy as np
2
+ import pandas as pd
5
3
import pyro
6
4
import pyro .distributions as dist
7
5
import torch
10
8
from bvas .util import safe_cholesky
11
9
12
10
13
- def laplace_inference (Y , Gamma ,
11
+ def laplace_inference (Y , Gamma , mutations ,
14
12
coef_scale = 1.0e-2 , seed = 0 , num_steps = 10 ** 4 ,
15
13
log_every = 500 , init_lr = 0.01 ):
16
14
r"""
17
15
Use Maximum A Posteriori (MAP) inference and a diffusion-based likelihood in conjunction
18
16
with a sparsity-inducing Laplace prior on selection coefficients to infer
19
17
selection effects from genomic surveillance data.
20
18
19
+ Unlike most of the code in this repository, `laplace_inference` depends on Pyro.
20
+
21
21
:param torch.Tensor Y: A torch.Tensor of shape (A,) that encodes integrated alelle frequency
22
22
increments for each allele and where A is the number of alleles.
23
23
:param torch.Tensor Gamma: A torch.Tensor of shape (A, A) that encodes information about
24
24
second moments of allele frequencies.
25
+ :param list mutations: A list of strings of length `A` that encodes the names of the `A` alleles in `Y`.
25
26
:param float coef_scale: The regularization scale of the Laplace prior. Defaults to 0.01.
26
27
:param int seed: Random number seed for reproducibility.
27
28
:param int num_steps: The number of optimization steps to do. Defaults to ten thousand.
28
29
:param int log_every: Controls logging frequency. Defaults to 500.
29
30
:param float init_lr: The initial learning rate. Defaults to 0.01.
30
31
31
- :returns dict : Returns a dictionary of containing the inferred selection coefficients beta .
32
+ :returns pandas.DataFrame : Returns a `pd.DataFrame` containing results of inference .
32
33
"""
33
34
pyro .clear_param_store ()
34
35
35
36
A = Gamma .size (- 1 )
37
+ assert len (mutations ) == A == Gamma .size (- 2 ) == Y .size (0 )
36
38
37
39
L = safe_cholesky (Gamma , num_tries = 10 )
38
40
L_Y = trisolve (L , Y .unsqueeze (- 1 ), upper = False ).squeeze (- 1 )
@@ -45,7 +47,8 @@ def fit_svi():
45
47
pyro .set_rng_seed (seed )
46
48
47
49
guide = pyro .infer .autoguide .AutoDelta (model )
48
- optim = pyro .optim .ClippedAdam ({"lr" : init_lr , "lrd" : 0.01 ** (1 / num_steps ), "betas" : (0.5 , 0.99 )})
50
+ optim = pyro .optim .ClippedAdam ({"lr" : init_lr , "lrd" : 0.01 ** (1 / num_steps ),
51
+ "betas" : (0.5 , 0.99 )})
49
52
svi = pyro .infer .SVI (model , guide , optim , pyro .infer .Trace_ELBO ())
50
53
51
54
for step in range (num_steps ):
@@ -56,5 +59,9 @@ def fit_svi():
56
59
return guide
57
60
58
61
beta = fit_svi ().median ()['beta' ].data .cpu ().numpy ()
62
+ beta = pd .DataFrame (beta , index = mutations , columns = ['Beta' ])
63
+ beta ['BetaAbs' ] = np .fabs (beta .Beta .values )
64
+ beta = beta .sort_values (by = 'BetaAbs' , ascending = False )
65
+ beta ['Rank' ] = 1 + np .arange (beta .shape [0 ])
59
66
60
- return { ' beta' : beta }
67
+ return beta [[ 'Beta' , 'Rank' ]]
0 commit comments