add option to select taxonomic ranks for performance rankings

CAMI-challenge · May 15, 2019 · d93a4d4 · d93a4d4
1 parent d1c0ded
commit d93a4d4
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 12 deletions.
diff --git a/index.html b/index.html
@@ -6,6 +6,6 @@
 </head>
 <body>
    <p>The page has moved to:
-   <a href="https://cami-challenge.github.io/OPAL/cami_i_hc/">OPAL example page</a></p>
+   <a href="https://cami-challenge.github.io/OPAL/cami_ii_mg/">OPAL example page</a></p>
 </body>
 </html>
diff --git a/opal.py b/opal.py
@@ -304,6 +304,7 @@ def main():
     group2.add_argument('-t', '--time', help='Comma-separated runtimes in hours', required=False)
     group2.add_argument('-m', '--memory', help='Comma-separated memory usages in gigabytes', required=False)
     group2.add_argument('-d', '--desc', help='Description for HTML page', required=False)
+    group2.add_argument('-r', '--ranks', help='Highest and lowest taxonomic ranks to consider in performance rankings, comma-separated. Valid ranks: superkingdom, phylum, class, order, family, genus, species, strain (default:superkingdom,species)', required=False)
     group2.add_argument('--silent', help='Silent mode', action='store_true')
     group2.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
     group2.add_argument('-h', '--help', action='help', help='Show this help message and exit')
@@ -357,7 +358,7 @@ def main():
     logger.info('done')
 
     logger.info('Computing rankings...')
-    pd_rankings = rk.highscore_table(pd_metrics)
+    pd_rankings, ranks_scored = rk.highscore_table(pd_metrics, args.ranks)
     logger.info('done')
 
     if time_list or memory_list:
@@ -366,7 +367,7 @@ def main():
         logger.info('done')
 
     logger.info('Creating HTML page...')
-    html.create_html(pd_rankings, pd_metrics, labels, sample_ids_list, plots_list, output_dir, args.desc)
+    html.create_html(pd_rankings, ranks_scored, pd_metrics, labels, sample_ids_list, plots_list, output_dir, args.desc)
     logger.info('done')
 
     logger.info('OPAL finished successfully. All results have been saved to {}'.format(output_dir))

diff --git a/src/html_opal.py b/src/html_opal.py
@@ -137,7 +137,7 @@ def get_formatted_pd_rankings(pd_rankings):
     return pd_show, pd_show_unsorted_pos
 
 
-def create_rankings_html(pd_rankings):
+def create_rankings_html(pd_rankings, ranks_scored):
     pd_show, pd_show_unsorted_pos = get_formatted_pd_rankings(pd_rankings)
 
     table_source = ColumnDataSource(pd_show)
@@ -199,7 +199,8 @@ def create_rankings_html(pd_rankings):
     p = figure(x_range=pd_show_unsorted_pos[SUM_OF_SCORES].tolist(), plot_width=800, plot_height=400, title=SUM_OF_SCORES + " - lower is better")
     p.vbar(x='x', top='top', source=source, width=0.5, bottom=0, color="firebrick")
 
-    col_rankings = column([Div(text="<font color='navy'><u>Hint 1:</u> click on the columns of scores for sorting.</font>", style={"width": "500px", "margin-bottom": "10px"}),
+    col_rankings = column([Div(text="<font color='navy'><u>Hint 1:</u> click on the columns of scores for sorting.</font>", style={"width": "600px", "margin-bottom": "0px"}),
+                           Div(text="Taxonomic ranks scored: " + ", ".join(ranks_scored), style={"width": "600px", "margin-bottom": "0px"}),
                            data_table,
                            Div(text="<font color='navy'><u>Hint 2:</u> slide the bars to change the weight of the metrics.</font>", style={"width": "500px", "margin-top": "18px"}),
                            row(weight_recall, weight_precision),
@@ -482,16 +483,16 @@ def create_computing_efficiency_tab(pd_metrics, plots_list, tabs_list):
     tabs_list.append(Panel(child=column_time_memory, title="Computing efficiency"))
 
 
-def create_html(pd_rankings, pd_metrics, labels, sample_ids_list, plots_list, output_dir, desc_text):
-    col_rankings = create_rankings_html(pd_rankings)
+def create_html(pd_rankings, ranks_scored, pd_metrics, labels, sample_ids_list, plots_list, output_dir, desc_text):
+    col_rankings = create_rankings_html(pd_rankings, ranks_scored)
 
     create_heatmap_bar(output_dir)
 
     select_sample, select_rank, heatmap_legend_div, mytable1 = create_metrics_table(pd_metrics, labels, sample_ids_list)
 
     tabs_plots = create_plots_html(plots_list)
 
-    metrics_row = row(column(select_sample, select_rank, heatmap_legend_div, mytable1, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-height-auto', 'bk-inline-block']), column(tabs_plots, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-inline-block']), css_classes=['bk-width-auto', 'bk-inline-block'], sizing_mode='scale_width')
+    metrics_row = row(column(row(select_sample, select_rank, css_classes=['bk-width-auto', 'bk-combo-box']), heatmap_legend_div, mytable1, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-height-auto', 'bk-inline-block']), column(tabs_plots, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-inline-block']), css_classes=['bk-width-auto', 'bk-inline-block'], sizing_mode='scale_width')
 
     beta_div_column = create_beta_diversity_tab(labels, plots_list)
 

diff --git a/src/rankings.py b/src/rankings.py
@@ -2,9 +2,23 @@
 
 from src.utils import constants as c
 import pandas as pd
+import logging
 
 
-def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'genus']):
+def get_user_ranks_list(ranks):
+    rank_high_low = [x.strip() for x in ranks.split(',')]
+    if len(rank_high_low) != 2 or rank_high_low[0] not in c.ALL_RANKS or rank_high_low[1] not in c.ALL_RANKS:
+        logging.getLogger('opal').warning('Invalid ranks provided with option --ranks. Default will be used.')
+        return c.ALL_RANKS[:7]
+    index1 = c.ALL_RANKS.index(rank_high_low[0])
+    index2 = c.ALL_RANKS.index(rank_high_low[1])
+    if index1 < index2:
+        return c.ALL_RANKS[index1:index2 + 1]
+    else:
+        return c.ALL_RANKS[index2:index1 + 1]
+
+
+def highscore_table(metrics, ranks):
     """Compile a ranking table like Figure 3c of CAMI publication.
     
     Note that Figure 3c took into account mean scores for all samples of one of the three
@@ -18,14 +32,19 @@ def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'ge
         Information about metrics of tool performance.
         Must contain columns: metric, rank, tool, value
     useranks : [str]
-        Default: 'phylum', 'class', 'order', 'family', 'genus'
+        Old default (CAMI 1): 'phylum', 'class', 'order', 'family', 'genus'
         Which ranks should be considered for rank dependent metrics.
         Here we decided to exclude e.g. species, because most profilers
         fail at that rank and we don't want to emphasize on this rank.
     Returns
     -------
     Pandas.DataFrame holding a high scoring table as in Figure 3c.
     """
+    if ranks:
+        useranks = get_user_ranks_list(ranks)
+    else:
+        useranks = c.ALL_RANKS[:7]
+
     pd_metrics = metrics.copy()
     pd_metrics.loc[pd_metrics[pd.isnull(pd_metrics['rank'])].index, 'rank'] = 'rank independent'
 
@@ -48,7 +67,7 @@ def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'ge
                 posresults.append(res)
     posresults = pd.concat(posresults)
 
-    return posresults.groupby(['metric', 'tool'])['position'].sum().to_frame()
+    return posresults.groupby(['metric', 'tool'])['position'].sum().to_frame(), useranks
 
     # reformat like Figure 3c
     os = []

diff --git a/version.py b/version.py
@@ -1 +1 @@
-__version__ = '1.0.1'
+__version__ = '1.0.2'