improve lapalce api

martinjankowiak · martinjankowiak · commit a2b50acba811 · 2022-04-13T19:16:51.000-04:00
diff --git a/bvas/laplace.py b/bvas/laplace.py
@@ -1,7 +1,5 @@
-"""
-Unlike most of the code in this repository, This code requires pyro.
-See https://github.com/pyro-ppl/pyro#installing for installation instructions.
-"""
+import numpy as np
+import pandas as pd
 import pyro
 import pyro.distributions as dist
 import torch
@@ -10,29 +8,33 @@
 from bvas.util import safe_cholesky
 
 
-def laplace_inference(Y, Gamma,
+def laplace_inference(Y, Gamma, mutations,
                       coef_scale=1.0e-2, seed=0, num_steps=10 ** 4,
                       log_every=500, init_lr=0.01):
     r"""
     Use Maximum A Posteriori (MAP) inference and a diffusion-based likelihood in conjunction
     with a sparsity-inducing Laplace prior on selection coefficients to infer
     selection effects from genomic surveillance data.
 
+    Unlike most of the code in this repository, `laplace_inference` depends on Pyro.
+
     :param torch.Tensor Y: A torch.Tensor of shape (A,) that encodes integrated alelle frequency
         increments for each allele and where A is the number of alleles.
     :param torch.Tensor Gamma: A torch.Tensor of shape (A, A) that encodes information about
         second moments of allele frequencies.
+    :param list mutations: A list of strings of length `A` that encodes the names of the `A` alleles in `Y`.
     :param float coef_scale: The regularization scale of the Laplace prior. Defaults to 0.01.
     :param int seed: Random number seed for reproducibility.
     :param int num_steps: The number of optimization steps to do. Defaults to ten thousand.
     :param int log_every: Controls logging frequency. Defaults to 500.
     :param float init_lr: The initial learning rate. Defaults to 0.01.
 
-    :returns dict: Returns a dictionary of containing the inferred selection coefficients beta.
+    :returns pandas.DataFrame: Returns a `pd.DataFrame` containing results of inference.
     """
     pyro.clear_param_store()
 
     A = Gamma.size(-1)
+    assert len(mutations) == A == Gamma.size(-2) == Y.size(0)
 
     L = safe_cholesky(Gamma, num_tries=10)
     L_Y = trisolve(L, Y.unsqueeze(-1), upper=False).squeeze(-1)
@@ -45,7 +47,8 @@ def fit_svi():
         pyro.set_rng_seed(seed)
 
         guide = pyro.infer.autoguide.AutoDelta(model)
-        optim = pyro.optim.ClippedAdam({"lr": init_lr, "lrd": 0.01 ** (1 / num_steps), "betas": (0.5, 0.99)})
+        optim = pyro.optim.ClippedAdam({"lr": init_lr, "lrd": 0.01 ** (1 / num_steps),
+                                        "betas": (0.5, 0.99)})
         svi = pyro.infer.SVI(model, guide, optim, pyro.infer.Trace_ELBO())
 
         for step in range(num_steps):
@@ -56,5 +59,9 @@ def fit_svi():
         return guide
 
     beta = fit_svi().median()['beta'].data.cpu().numpy()
+    beta = pd.DataFrame(beta, index=mutations, columns=['Beta'])
+    beta['BetaAbs'] = np.fabs(beta.Beta.values)
+    beta = beta.sort_values(by='BetaAbs', ascending=False)
+    beta['Rank'] = 1 + np.arange(beta.shape[0])
 
-    return {'beta': beta}
+    return beta[['Beta', 'Rank']]
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -65,5 +65,6 @@
     "python": ("https://docs.python.org/3/", None),
     "torch": ("https://pytorch.org/docs/master/", None),
     "pyro": ("http://docs.pyro.ai/en/stable/", None),
+    "pandas": ("https://pandas.pydata.org/docs/", None),
     "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
 }
diff --git a/notebooks/basic_demo.ipynb b/notebooks/basic_demo.ipynb

Original file line number	Diff line number	Diff line change
`@@ -65,5 +65,6 @@`
`65`	`65`	`"python": ("https://docs.python.org/3/", None),`
`66`	`66`	`"torch": ("https://pytorch.org/docs/master/", None),`
`67`	`67`	`"pyro": ("http://docs.pyro.ai/en/stable/", None),`
	`68`	`+ "pandas": ("https://pandas.pydata.org/docs/", None),`
`68`	`69`	`"scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),`
`69`	`70`	`}`