From aeae43fb57a9c155de2919c97ac03bbb2b680117 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Thu, 17 Oct 2019 21:34:55 -0400 Subject: [PATCH] TST: fixing orientation of ranks in heatmaps (#100) * TST: fixing orientation of ranks in heatmaps * version bump * adjusting spacing in heatmap * Adding option to row center conditional probs when visualizing * flake8 --- CHANGELOG.md | 4 ++ examples/cf/check_rhamnolipids.ipynb | 19 ++++++++- examples/soils/check_soils.ipynb | 62 ++++++++++++++-------------- examples/soils/run.sh | 19 ++++----- mmvec/__init__.py | 2 +- mmvec/heatmap.py | 4 +- mmvec/q2/_visualizers.py | 16 ++++++- mmvec/q2/plugin_setup.py | 6 +++ mmvec/q2/tests/test_visualizers.py | 4 +- 9 files changed, 85 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba2b892..5014112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # mmvec changelog +## Version 1.0.2 (2019-10-18) +# Bug fixes + - Inputs are now expected to be metabolites x microbes in heatmaps [#100](https://github.com/biocore/mmvec/pull/100) + ## Version 1.0.1 (2019-10-17) # Enhancements - Ranks are transposed and viewable in qiime metadata tabulate [#99](https://github.com/biocore/mmvec/pull/99) diff --git a/examples/cf/check_rhamnolipids.ipynb b/examples/cf/check_rhamnolipids.ipynb index 3c763d1..3c932da 100644 --- a/examples/cf/check_rhamnolipids.ipynb +++ b/examples/cf/check_rhamnolipids.ipynb @@ -4,7 +4,22 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m8a937f0c-d349-40e0-acf9-9e221ba5b292\u001b[m\u001b[m microbe-metadata.txt\r\n", + "biplot.qza otus_nt.biom\r\n", + "check_rhamnolipids.ipynb otus_nt.qza\r\n", + "emperor.qzv q2_run.sh\r\n", + "heatmap.qzv ranks.qza\r\n", + "lcms_nt.biom \u001b[34msummary\u001b[m\u001b[m\r\n", + "lcms_nt.qza \u001b[34msummarydir\u001b[m\u001b[m\r\n", + "metabolite-metadata.txt \u001b[34mtesting\u001b[m\u001b[m\r\n" + ] + } + ], "source": [ "!ls" ] @@ -45,7 +60,7 @@ "import pandas as pd\n", "import numpy as np\n", "fname = 'latent_dim_3_input_prior_1.00_output_prior_1.00_beta1_0.90_beta2_0.95_ranks.txt'\n", - "ranks = pd.read_csv(f'testing/{fname}', sep='\\t', index_col=0)\n", + "ranks = pd.read_csv(f'summary/{fname}', sep='\\t', index_col=0)\n", "microbe_metadata = pd.read_csv('microbe-metadata.txt', sep='\\t', index_col=0)\n", "metabolite_metadata = pd.read_csv('metabolite-metadata.txt', sep='\\t', index_col=0)" ] diff --git a/examples/soils/check_soils.ipynb b/examples/soils/check_soils.ipynb index c407914..365ad6f 100644 --- a/examples/soils/check_soils.ipynb +++ b/examples/soils/check_soils.ipynb @@ -57,11 +57,11 @@ "data": { "text/plain": [ "featureid\n", - "(2,3-dihydroxy-3-methylbutanoate) -5.243021\n", - "(2,5-diaminohexanoate) -1.290612\n", - "(3-hydroxypyridine) 0.002373\n", - "(3-methyladenine) 0.971289\n", - "(4-oxoproline) 2.978444\n", + "(2,3-dihydroxy-3-methylbutanoate) -3.987261\n", + "(2,5-diaminohexanoate) -1.352668\n", + "(3-hydroxypyridine) -0.020257\n", + "(3-methyladenine) 0.959734\n", + "(4-oxoproline) 2.986923\n", "Name: rplo 1 (Cyanobacteria), dtype: float64" ] }, @@ -177,19 +177,19 @@ "data": { "text/plain": [ "featureid\n", - "cytosine 3.238725\n", - "xanthine 0.687712\n", - "N-acetylornithine 1.247421\n", - "uracil 1.778591\n", - "adenine 4.983674\n", - "(N6-acetyl-lysine) 4.423469\n", - "4-guanidinobutanoate 4.031901\n", - "guanine 3.107524\n", - "hypoxanthine 0.666798\n", - "7-methyladenine 0.302561\n", - "succinate 0.893106\n", - "(3-methyladenine) 0.971289\n", - "adenosine 4.995941\n", + "xanthine 0.642930\n", + "(N6-acetyl-lysine) 4.409032\n", + "succinate 0.878566\n", + "guanine 3.086299\n", + "adenine 4.947557\n", + "N-acetylornithine 1.247694\n", + "7-methyladenine 0.232607\n", + "cytosine 3.205279\n", + "hypoxanthine 0.661717\n", + "4-guanidinobutanoate 3.998861\n", + "(3-methyladenine) 0.959734\n", + "adenosine 4.981767\n", + "uracil 1.782586\n", "Name: rplo 1 (Cyanobacteria), dtype: float64" ] }, @@ -287,19 +287,19 @@ "data": { "text/plain": [ "featureid\n", - "cytosine 3.147861\n", - "xanthine 0.842640\n", - "N-acetylornithine 1.281711\n", - "uracil 1.990830\n", - "adenine 5.086781\n", - "(N6-acetyl-lysine) 4.530147\n", - "4-guanidinobutanoate 4.027770\n", - "guanine 3.129724\n", - "hypoxanthine 0.713017\n", - "7-methyladenine 0.492983\n", - "succinate 0.805413\n", - "(3-methyladenine) 1.006567\n", - "adenosine 4.987744\n", + "xanthine 0.742247\n", + "(N6-acetyl-lysine) 4.348102\n", + "succinate 0.720436\n", + "guanine 2.874898\n", + "adenine 4.852340\n", + "N-acetylornithine 1.147904\n", + "7-methyladenine 0.340129\n", + "cytosine 3.000772\n", + "hypoxanthine 0.522730\n", + "4-guanidinobutanoate 3.888838\n", + "(3-methyladenine) 0.750547\n", + "adenosine 4.898254\n", + "uracil 1.723071\n", "Name: rplo 1 (Cyanobacteria), dtype: float64" ] }, diff --git a/examples/soils/run.sh b/examples/soils/run.sh index 82c8442..ec6dd1d 100644 --- a/examples/soils/run.sh +++ b/examples/soils/run.sh @@ -7,14 +7,11 @@ mmvec paired-omics\ --learning-rate 1e-3 \ --epochs 3000 -qiime tools import --input-path microbes.biom --output-path microbes.biom.qza --type FeatureTable[Frequency] -qiime tools import --input-path metabolites.biom --output-path metabolites.biom.qza --type FeatureTable[Frequency] - -qiime mmvec paired-omics \ - --i-microbes microbes.biom.qza \ - --i-metabolites metabolites.biom.qza \ - --p-epochs 100 \ - --p-learning-rate 1e-3 \ - --o-conditionals ranks.qza \ - --o-conditional-biplot biplot.qza \ - --verbose +qiime mmvec paired-omics\ + --microbe-file microbes.biom \ + --metabolite-file metabolites.biom \ + --num-testing-examples 1 \ + --min-feature-count 0 \ + --latent-dim 1 \ + --learning-rate 1e-3 \ + --epochs 3000 diff --git a/mmvec/__init__.py b/mmvec/__init__.py index 4f22ff9..9aaad04 100644 --- a/mmvec/__init__.py +++ b/mmvec/__init__.py @@ -1,5 +1,5 @@ from .heatmap import _heatmap_choices, _cmaps -__version__ = "1.0.1" +__version__ = "1.0.2" __all__ = ['_heatmap_choices', '_cmaps'] diff --git a/mmvec/heatmap.py b/mmvec/heatmap.py index 5aeb19c..ff1b64f 100644 --- a/mmvec/heatmap.py +++ b/mmvec/heatmap.py @@ -95,7 +95,7 @@ def ranks_heatmap(ranks, microbe_metadata=None, metabolite_metadata=None, hotmap.ax_row_dendrogram.bar( 0, 0, color=row_class_colors[label], label=label, linewidth=0) hotmap.ax_row_dendrogram.legend( - title=microbe_metadata.name, ncol=1, bbox_to_anchor=(0.1, 0.7), + title=microbe_metadata.name, ncol=1, bbox_to_anchor=(0.2, 0.7), bbox_transform=plt.gcf().transFigure) # toggle axis labels @@ -104,6 +104,7 @@ def ranks_heatmap(ranks, microbe_metadata=None, metabolite_metadata=None, if not y_labels: hotmap.ax_heatmap.set_yticklabels('') + plt.subplots_adjust(left=0.2) return hotmap @@ -229,7 +230,6 @@ def paired_heatmaps(ranks, microbes_table, metabolites_table, microbe_metadata, axes[0].set_xlabel('Microbes') axes[1].set_title('Metabolite abundances') axes[1].set_xlabel('Metabolites') - return select_microbes, select_metabolites, heatmaps diff --git a/mmvec/q2/_visualizers.py b/mmvec/q2/_visualizers.py index f76d1bb..6861768 100644 --- a/mmvec/q2/_visualizers.py +++ b/mmvec/q2/_visualizers.py @@ -20,11 +20,16 @@ def heatmap(output_dir: str, margin_palette: str = 'cubehelix', x_labels: bool = False, y_labels: bool = False, - level: int = -1) -> None: + level: int = -1, + row_center: bool = True) -> None: if microbe_metadata is not None: microbe_metadata = microbe_metadata.to_series() if metabolite_metadata is not None: metabolite_metadata = metabolite_metadata.to_series() + ranks = ranks.T + + if row_center: + ranks = ranks - ranks.mean(axis=0) hotmap = ranks_heatmap(ranks, microbe_metadata, metabolite_metadata, method, metric, color_palette, margin_palette, @@ -51,9 +56,16 @@ def paired_heatmap(output_dir: str, normalize: str = 'log10', color_palette: str = 'magma', top_k_metabolites: int = 50, - level: int = -1) -> None: + level: int = -1, + row_center: bool = True) -> None: if microbe_metadata is not None: microbe_metadata = microbe_metadata.to_series() + + ranks = ranks.T + + if row_center: + ranks = ranks - ranks.mean(axis=0) + select_microbes, select_metabolites, hotmaps = paired_heatmaps( ranks, microbes_table, metabolites_table, microbe_metadata, features, top_k_microbes, top_k_metabolites, keep_top_samples, level, normalize, diff --git a/mmvec/q2/plugin_setup.py b/mmvec/q2/plugin_setup.py index e912c22..cb385c0 100644 --- a/mmvec/q2/plugin_setup.py +++ b/mmvec/q2/plugin_setup.py @@ -100,6 +100,7 @@ 'x_labels': Bool, 'y_labels': Bool, 'level': Int % Range(-1, None), + 'row_center': Bool, }, input_descriptions={'ranks': 'Conditional probabilities.'}, parameter_descriptions={ @@ -116,6 +117,8 @@ 'level': 'taxonomic level for annotating clustermap. Set to -1 if not ' 'parsing semicolon-delimited taxonomies or wish to print ' 'entire annotation.', + 'row_center': 'Center conditional probability table ' + 'around average row.' }, name='Conditional probability heatmap', description="Generate heatmap depicting mmvec conditional probabilities.", @@ -137,6 +140,7 @@ 'top_k_metabolites': Int % Range(1, None) | Str % Choices(['all']), 'keep_top_samples': Bool, 'level': Int % Range(-1, None), + 'row_center': Bool, }, input_descriptions={'ranks': 'Conditional probabilities.', 'microbes_table': 'Microbial feature abundances.', @@ -162,6 +166,8 @@ 'level': 'taxonomic level for annotating clustermap. Set to -1 if not ' 'parsing semicolon-delimited taxonomies or wish to print ' 'entire annotation.', + 'row_center': 'Center conditional probability table ' + 'around average row.' }, name='Paired feature abundance heatmaps', description="Generate paired heatmaps that depict microbial and " diff --git a/mmvec/q2/tests/test_visualizers.py b/mmvec/q2/tests/test_visualizers.py index c211722..6171670 100644 --- a/mmvec/q2/tests/test_visualizers.py +++ b/mmvec/q2/tests/test_visualizers.py @@ -14,7 +14,7 @@ def setUp(self): _ranks = pd.DataFrame([[4.1, 1.3, 2.1], [0.1, 0.3, 0.2], [2.2, 4.3, 3.2], [-6.3, -4.4, 2.1]], index=pd.Index([c for c in 'ABCD'], name='id'), - columns=['m1', 'm2', 'm3']) + columns=['m1', 'm2', 'm3']).T self.ranks = Artifact.import_data('FeatureData[Conditional]', _ranks) self.taxa = CategoricalMetadataColumn(pd.Series([ 'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; ' @@ -53,7 +53,7 @@ def setUp(self): _ranks = pd.DataFrame([[4.1, 1.3, 2.1], [0.1, 0.3, 0.2], [2.2, 4.3, 3.2], [-6.3, -4.4, 2.1]], index=pd.Index([c for c in 'ABCD'], name='id'), - columns=['m1', 'm2', 'm3']) + columns=['m1', 'm2', 'm3']).T self.ranks = Artifact.import_data('FeatureData[Conditional]', _ranks) self.taxa = CategoricalMetadataColumn(pd.Series([ 'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; '