Skip to content

Commit

Permalink
add option to select taxonomic ranks for performance rankings
Browse files Browse the repository at this point in the history
  • Loading branch information
fernandomeyer committed May 15, 2019
1 parent d1c0ded commit d93a4d4
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 12 deletions.
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
</head>
<body>
<p>The page has moved to:
<a href="https://cami-challenge.github.io/OPAL/cami_i_hc/">OPAL example page</a></p>
<a href="https://cami-challenge.github.io/OPAL/cami_ii_mg/">OPAL example page</a></p>
</body>
</html>
5 changes: 3 additions & 2 deletions opal.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ def main():
group2.add_argument('-t', '--time', help='Comma-separated runtimes in hours', required=False)
group2.add_argument('-m', '--memory', help='Comma-separated memory usages in gigabytes', required=False)
group2.add_argument('-d', '--desc', help='Description for HTML page', required=False)
group2.add_argument('-r', '--ranks', help='Highest and lowest taxonomic ranks to consider in performance rankings, comma-separated. Valid ranks: superkingdom, phylum, class, order, family, genus, species, strain (default:superkingdom,species)', required=False)
group2.add_argument('--silent', help='Silent mode', action='store_true')
group2.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
group2.add_argument('-h', '--help', action='help', help='Show this help message and exit')
Expand Down Expand Up @@ -357,7 +358,7 @@ def main():
logger.info('done')

logger.info('Computing rankings...')
pd_rankings = rk.highscore_table(pd_metrics)
pd_rankings, ranks_scored = rk.highscore_table(pd_metrics, args.ranks)
logger.info('done')

if time_list or memory_list:
Expand All @@ -366,7 +367,7 @@ def main():
logger.info('done')

logger.info('Creating HTML page...')
html.create_html(pd_rankings, pd_metrics, labels, sample_ids_list, plots_list, output_dir, args.desc)
html.create_html(pd_rankings, ranks_scored, pd_metrics, labels, sample_ids_list, plots_list, output_dir, args.desc)
logger.info('done')

logger.info('OPAL finished successfully. All results have been saved to {}'.format(output_dir))
Expand Down
11 changes: 6 additions & 5 deletions src/html_opal.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def get_formatted_pd_rankings(pd_rankings):
return pd_show, pd_show_unsorted_pos


def create_rankings_html(pd_rankings):
def create_rankings_html(pd_rankings, ranks_scored):
pd_show, pd_show_unsorted_pos = get_formatted_pd_rankings(pd_rankings)

table_source = ColumnDataSource(pd_show)
Expand Down Expand Up @@ -199,7 +199,8 @@ def create_rankings_html(pd_rankings):
p = figure(x_range=pd_show_unsorted_pos[SUM_OF_SCORES].tolist(), plot_width=800, plot_height=400, title=SUM_OF_SCORES + " - lower is better")
p.vbar(x='x', top='top', source=source, width=0.5, bottom=0, color="firebrick")

col_rankings = column([Div(text="<font color='navy'><u>Hint 1:</u> click on the columns of scores for sorting.</font>", style={"width": "500px", "margin-bottom": "10px"}),
col_rankings = column([Div(text="<font color='navy'><u>Hint 1:</u> click on the columns of scores for sorting.</font>", style={"width": "600px", "margin-bottom": "0px"}),
Div(text="Taxonomic ranks scored: " + ", ".join(ranks_scored), style={"width": "600px", "margin-bottom": "0px"}),
data_table,
Div(text="<font color='navy'><u>Hint 2:</u> slide the bars to change the weight of the metrics.</font>", style={"width": "500px", "margin-top": "18px"}),
row(weight_recall, weight_precision),
Expand Down Expand Up @@ -482,16 +483,16 @@ def create_computing_efficiency_tab(pd_metrics, plots_list, tabs_list):
tabs_list.append(Panel(child=column_time_memory, title="Computing efficiency"))


def create_html(pd_rankings, pd_metrics, labels, sample_ids_list, plots_list, output_dir, desc_text):
col_rankings = create_rankings_html(pd_rankings)
def create_html(pd_rankings, ranks_scored, pd_metrics, labels, sample_ids_list, plots_list, output_dir, desc_text):
col_rankings = create_rankings_html(pd_rankings, ranks_scored)

create_heatmap_bar(output_dir)

select_sample, select_rank, heatmap_legend_div, mytable1 = create_metrics_table(pd_metrics, labels, sample_ids_list)

tabs_plots = create_plots_html(plots_list)

metrics_row = row(column(select_sample, select_rank, heatmap_legend_div, mytable1, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-height-auto', 'bk-inline-block']), column(tabs_plots, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-inline-block']), css_classes=['bk-width-auto', 'bk-inline-block'], sizing_mode='scale_width')
metrics_row = row(column(row(select_sample, select_rank, css_classes=['bk-width-auto', 'bk-combo-box']), heatmap_legend_div, mytable1, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-height-auto', 'bk-inline-block']), column(tabs_plots, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-inline-block']), css_classes=['bk-width-auto', 'bk-inline-block'], sizing_mode='scale_width')

beta_div_column = create_beta_diversity_tab(labels, plots_list)

Expand Down
25 changes: 22 additions & 3 deletions src/rankings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,23 @@

from src.utils import constants as c
import pandas as pd
import logging


def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'genus']):
def get_user_ranks_list(ranks):
rank_high_low = [x.strip() for x in ranks.split(',')]
if len(rank_high_low) != 2 or rank_high_low[0] not in c.ALL_RANKS or rank_high_low[1] not in c.ALL_RANKS:
logging.getLogger('opal').warning('Invalid ranks provided with option --ranks. Default will be used.')
return c.ALL_RANKS[:7]
index1 = c.ALL_RANKS.index(rank_high_low[0])
index2 = c.ALL_RANKS.index(rank_high_low[1])
if index1 < index2:
return c.ALL_RANKS[index1:index2 + 1]
else:
return c.ALL_RANKS[index2:index1 + 1]


def highscore_table(metrics, ranks):
"""Compile a ranking table like Figure 3c of CAMI publication.
Note that Figure 3c took into account mean scores for all samples of one of the three
Expand All @@ -18,14 +32,19 @@ def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'ge
Information about metrics of tool performance.
Must contain columns: metric, rank, tool, value
useranks : [str]
Default: 'phylum', 'class', 'order', 'family', 'genus'
Old default (CAMI 1): 'phylum', 'class', 'order', 'family', 'genus'
Which ranks should be considered for rank dependent metrics.
Here we decided to exclude e.g. species, because most profilers
fail at that rank and we don't want to emphasize on this rank.
Returns
-------
Pandas.DataFrame holding a high scoring table as in Figure 3c.
"""
if ranks:
useranks = get_user_ranks_list(ranks)
else:
useranks = c.ALL_RANKS[:7]

pd_metrics = metrics.copy()
pd_metrics.loc[pd_metrics[pd.isnull(pd_metrics['rank'])].index, 'rank'] = 'rank independent'

Expand All @@ -48,7 +67,7 @@ def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'ge
posresults.append(res)
posresults = pd.concat(posresults)

return posresults.groupby(['metric', 'tool'])['position'].sum().to_frame()
return posresults.groupby(['metric', 'tool'])['position'].sum().to_frame(), useranks

# reformat like Figure 3c
os = []
Expand Down
2 changes: 1 addition & 1 deletion version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.0.1'
__version__ = '1.0.2'

0 comments on commit d93a4d4

Please sign in to comment.