TST: fixing orientation of ranks in heatmaps (#100)

* TST: fixing orientation of ranks in heatmaps * version bump * adjusting spacing in heatmap * Adding option to row center conditional probs when visualizing * flake8
biocore · Oct 18, 2019 · aeae43f · aeae43f
1 parent 7457c87
commit aeae43f
Show file tree

Hide file tree

Showing 9 changed files with 85 additions and 51 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # mmvec changelog
 
+## Version 1.0.2 (2019-10-18)
+# Bug fixes
+ - Inputs are now expected to be metabolites x microbes in heatmaps [#100](https://github.com/biocore/mmvec/pull/100)
+
 ## Version 1.0.1 (2019-10-17)
 # Enhancements
  - Ranks are transposed and viewable in qiime metadata tabulate [#99](https://github.com/biocore/mmvec/pull/99)

diff --git a/examples/cf/check_rhamnolipids.ipynb b/examples/cf/check_rhamnolipids.ipynb
@@ -4,7 +4,22 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m8a937f0c-d349-40e0-acf9-9e221ba5b292\u001b[m\u001b[m microbe-metadata.txt\r\n",
+      "biplot.qza                           otus_nt.biom\r\n",
+      "check_rhamnolipids.ipynb             otus_nt.qza\r\n",
+      "emperor.qzv                          q2_run.sh\r\n",
+      "heatmap.qzv                          ranks.qza\r\n",
+      "lcms_nt.biom                         \u001b[34msummary\u001b[m\u001b[m\r\n",
+      "lcms_nt.qza                          \u001b[34msummarydir\u001b[m\u001b[m\r\n",
+      "metabolite-metadata.txt              \u001b[34mtesting\u001b[m\u001b[m\r\n"
+     ]
+    }
+   ],
    "source": [
     "!ls"
    ]
@@ -45,7 +60,7 @@
     "import pandas as pd\n",
     "import numpy as np\n",
     "fname = 'latent_dim_3_input_prior_1.00_output_prior_1.00_beta1_0.90_beta2_0.95_ranks.txt'\n",
-    "ranks = pd.read_csv(f'testing/{fname}', sep='\\t', index_col=0)\n",
+    "ranks = pd.read_csv(f'summary/{fname}', sep='\\t', index_col=0)\n",
     "microbe_metadata = pd.read_csv('microbe-metadata.txt', sep='\\t', index_col=0)\n",
     "metabolite_metadata = pd.read_csv('metabolite-metadata.txt', sep='\\t', index_col=0)"
    ]

diff --git a/examples/soils/check_soils.ipynb b/examples/soils/check_soils.ipynb
@@ -57,11 +57,11 @@
      "data": {
       "text/plain": [
        "featureid\n",
-       "(2,3-dihydroxy-3-methylbutanoate)   -5.243021\n",
-       "(2,5-diaminohexanoate)              -1.290612\n",
-       "(3-hydroxypyridine)                  0.002373\n",
-       "(3-methyladenine)                    0.971289\n",
-       "(4-oxoproline)                       2.978444\n",
+       "(2,3-dihydroxy-3-methylbutanoate)   -3.987261\n",
+       "(2,5-diaminohexanoate)              -1.352668\n",
+       "(3-hydroxypyridine)                 -0.020257\n",
+       "(3-methyladenine)                    0.959734\n",
+       "(4-oxoproline)                       2.986923\n",
        "Name: rplo 1 (Cyanobacteria), dtype: float64"
       ]
      },
@@ -177,19 +177,19 @@
      "data": {
       "text/plain": [
        "featureid\n",
-       "cytosine                3.238725\n",
-       "xanthine                0.687712\n",
-       "N-acetylornithine       1.247421\n",
-       "uracil                  1.778591\n",
-       "adenine                 4.983674\n",
-       "(N6-acetyl-lysine)      4.423469\n",
-       "4-guanidinobutanoate    4.031901\n",
-       "guanine                 3.107524\n",
-       "hypoxanthine            0.666798\n",
-       "7-methyladenine         0.302561\n",
-       "succinate               0.893106\n",
-       "(3-methyladenine)       0.971289\n",
-       "adenosine               4.995941\n",
+       "xanthine                0.642930\n",
+       "(N6-acetyl-lysine)      4.409032\n",
+       "succinate               0.878566\n",
+       "guanine                 3.086299\n",
+       "adenine                 4.947557\n",
+       "N-acetylornithine       1.247694\n",
+       "7-methyladenine         0.232607\n",
+       "cytosine                3.205279\n",
+       "hypoxanthine            0.661717\n",
+       "4-guanidinobutanoate    3.998861\n",
+       "(3-methyladenine)       0.959734\n",
+       "adenosine               4.981767\n",
+       "uracil                  1.782586\n",
        "Name: rplo 1 (Cyanobacteria), dtype: float64"
       ]
      },
@@ -287,19 +287,19 @@
      "data": {
       "text/plain": [
        "featureid\n",
-       "cytosine                3.147861\n",
-       "xanthine                0.842640\n",
-       "N-acetylornithine       1.281711\n",
-       "uracil                  1.990830\n",
-       "adenine                 5.086781\n",
-       "(N6-acetyl-lysine)      4.530147\n",
-       "4-guanidinobutanoate    4.027770\n",
-       "guanine                 3.129724\n",
-       "hypoxanthine            0.713017\n",
-       "7-methyladenine         0.492983\n",
-       "succinate               0.805413\n",
-       "(3-methyladenine)       1.006567\n",
-       "adenosine               4.987744\n",
+       "xanthine                0.742247\n",
+       "(N6-acetyl-lysine)      4.348102\n",
+       "succinate               0.720436\n",
+       "guanine                 2.874898\n",
+       "adenine                 4.852340\n",
+       "N-acetylornithine       1.147904\n",
+       "7-methyladenine         0.340129\n",
+       "cytosine                3.000772\n",
+       "hypoxanthine            0.522730\n",
+       "4-guanidinobutanoate    3.888838\n",
+       "(3-methyladenine)       0.750547\n",
+       "adenosine               4.898254\n",
+       "uracil                  1.723071\n",
        "Name: rplo 1 (Cyanobacteria), dtype: float64"
       ]
      },

diff --git a/examples/soils/run.sh b/examples/soils/run.sh
@@ -7,14 +7,11 @@ mmvec paired-omics\
 	 --learning-rate 1e-3 \
 	 --epochs 3000
 
-qiime tools import --input-path microbes.biom --output-path microbes.biom.qza --type FeatureTable[Frequency]
-qiime tools import --input-path metabolites.biom --output-path metabolites.biom.qza --type FeatureTable[Frequency]
-
-qiime mmvec paired-omics \
-      --i-microbes microbes.biom.qza \
-      --i-metabolites metabolites.biom.qza  \
-      --p-epochs 100 \
-      --p-learning-rate 1e-3 \
-      --o-conditionals ranks.qza \
-      --o-conditional-biplot biplot.qza \
-      --verbose
+qiime mmvec paired-omics\
+	 --microbe-file microbes.biom \
+	 --metabolite-file metabolites.biom \
+	 --num-testing-examples 1 \
+	 --min-feature-count 0 \
+	 --latent-dim 1 \
+	 --learning-rate 1e-3 \
+	 --epochs 3000
diff --git a/mmvec/__init__.py b/mmvec/__init__.py
@@ -1,5 +1,5 @@
 from .heatmap import _heatmap_choices, _cmaps
 
-__version__ = "1.0.1"
+__version__ = "1.0.2"
 
 __all__ = ['_heatmap_choices', '_cmaps']
diff --git a/mmvec/heatmap.py b/mmvec/heatmap.py
@@ -95,7 +95,7 @@ def ranks_heatmap(ranks, microbe_metadata=None, metabolite_metadata=None,
             hotmap.ax_row_dendrogram.bar(
                 0, 0, color=row_class_colors[label], label=label, linewidth=0)
         hotmap.ax_row_dendrogram.legend(
-            title=microbe_metadata.name, ncol=1, bbox_to_anchor=(0.1, 0.7),
+            title=microbe_metadata.name, ncol=1, bbox_to_anchor=(0.2, 0.7),
             bbox_transform=plt.gcf().transFigure)
 
     # toggle axis labels
@@ -104,6 +104,7 @@ def ranks_heatmap(ranks, microbe_metadata=None, metabolite_metadata=None,
     if not y_labels:
         hotmap.ax_heatmap.set_yticklabels('')
 
+    plt.subplots_adjust(left=0.2)
     return hotmap
 
 
@@ -229,7 +230,6 @@ def paired_heatmaps(ranks, microbes_table, metabolites_table, microbe_metadata,
     axes[0].set_xlabel('Microbes')
     axes[1].set_title('Metabolite abundances')
     axes[1].set_xlabel('Metabolites')
-
     return select_microbes, select_metabolites, heatmaps
 
 

diff --git a/mmvec/q2/_visualizers.py b/mmvec/q2/_visualizers.py
@@ -20,11 +20,16 @@ def heatmap(output_dir: str,
             margin_palette: str = 'cubehelix',
             x_labels: bool = False,
             y_labels: bool = False,
-            level: int = -1) -> None:
+            level: int = -1,
+            row_center: bool = True) -> None:
     if microbe_metadata is not None:
         microbe_metadata = microbe_metadata.to_series()
     if metabolite_metadata is not None:
         metabolite_metadata = metabolite_metadata.to_series()
+    ranks = ranks.T
+
+    if row_center:
+        ranks = ranks - ranks.mean(axis=0)
 
     hotmap = ranks_heatmap(ranks, microbe_metadata, metabolite_metadata,
                            method, metric, color_palette, margin_palette,
@@ -51,9 +56,16 @@ def paired_heatmap(output_dir: str,
                    normalize: str = 'log10',
                    color_palette: str = 'magma',
                    top_k_metabolites: int = 50,
-                   level: int = -1) -> None:
+                   level: int = -1,
+                   row_center: bool = True) -> None:
     if microbe_metadata is not None:
         microbe_metadata = microbe_metadata.to_series()
+
+    ranks = ranks.T
+
+    if row_center:
+        ranks = ranks - ranks.mean(axis=0)
+
     select_microbes, select_metabolites, hotmaps = paired_heatmaps(
         ranks, microbes_table, metabolites_table, microbe_metadata, features,
         top_k_microbes, top_k_metabolites, keep_top_samples, level, normalize,

diff --git a/mmvec/q2/plugin_setup.py b/mmvec/q2/plugin_setup.py
@@ -100,6 +100,7 @@
         'x_labels': Bool,
         'y_labels': Bool,
         'level': Int % Range(-1, None),
+        'row_center': Bool,
     },
     input_descriptions={'ranks': 'Conditional probabilities.'},
     parameter_descriptions={
@@ -116,6 +117,8 @@
         'level': 'taxonomic level for annotating clustermap. Set to -1 if not '
                  'parsing semicolon-delimited taxonomies or wish to print '
                  'entire annotation.',
+        'row_center': 'Center conditional probability table '
+                      'around average row.'
     },
     name='Conditional probability heatmap',
     description="Generate heatmap depicting mmvec conditional probabilities.",
@@ -137,6 +140,7 @@
         'top_k_metabolites': Int % Range(1, None) | Str % Choices(['all']),
         'keep_top_samples': Bool,
         'level': Int % Range(-1, None),
+        'row_center': Bool,
     },
     input_descriptions={'ranks': 'Conditional probabilities.',
                         'microbes_table': 'Microbial feature abundances.',
@@ -162,6 +166,8 @@
         'level': 'taxonomic level for annotating clustermap. Set to -1 if not '
                  'parsing semicolon-delimited taxonomies or wish to print '
                  'entire annotation.',
+        'row_center': 'Center conditional probability table '
+                      'around average row.'
     },
     name='Paired feature abundance heatmaps',
     description="Generate paired heatmaps that depict microbial and "

diff --git a/mmvec/q2/tests/test_visualizers.py b/mmvec/q2/tests/test_visualizers.py
@@ -14,7 +14,7 @@ def setUp(self):
         _ranks = pd.DataFrame([[4.1, 1.3, 2.1], [0.1, 0.3, 0.2],
                                [2.2, 4.3, 3.2], [-6.3, -4.4, 2.1]],
                               index=pd.Index([c for c in 'ABCD'], name='id'),
-                              columns=['m1', 'm2', 'm3'])
+                              columns=['m1', 'm2', 'm3']).T
         self.ranks = Artifact.import_data('FeatureData[Conditional]', _ranks)
         self.taxa = CategoricalMetadataColumn(pd.Series([
             'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; '
@@ -53,7 +53,7 @@ def setUp(self):
         _ranks = pd.DataFrame([[4.1, 1.3, 2.1], [0.1, 0.3, 0.2],
                                [2.2, 4.3, 3.2], [-6.3, -4.4, 2.1]],
                               index=pd.Index([c for c in 'ABCD'], name='id'),
-                              columns=['m1', 'm2', 'm3'])
+                              columns=['m1', 'm2', 'm3']).T
         self.ranks = Artifact.import_data('FeatureData[Conditional]', _ranks)
         self.taxa = CategoricalMetadataColumn(pd.Series([
             'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; '