update requirements, unifrac tooltip, logging

CAMI-challenge · Apr 11, 2022 · a29a9d5 · a29a9d5
1 parent fa5c030
commit a29a9d5
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 6 deletions.
diff --git a/opal.py b/opal.py
@@ -8,6 +8,7 @@
 import pandas as pd
 import numpy as np
 import logging
+import shlex
 from src import l1norm as l1
 from src import binary_metrics as bm
 from src import unifrac_distance as uf
@@ -349,6 +350,8 @@ def get_logger(output_dir, silent):
     logging_fh.setFormatter(formatter)
     logger.addHandler(logging_fh)
 
+    logger.info(' '.join(map(shlex.quote, sys.argv)))
+
     if not silent:
         logging_stdout = logging.StreamHandler(sys.stdout)
         logging_stdout.setFormatter(formatter)

diff --git a/requirements/default.txt b/requirements/default.txt
@@ -3,6 +3,7 @@ matplotlib==3.2.1
 dendropy==4.5.2
 pandas==1.3.4
 h5py==3.4.0
+Jinja2==3.0.3
 bokeh==0.13.0
 seaborn==0.10.1
 scikit-bio==0.5.6

diff --git a/src/html_opal.py b/src/html_opal.py
@@ -296,9 +296,9 @@ def create_metrics_table(pd_metrics, labels, sample_ids_list):
     all_sample_ids.insert(0, '(average over samples)')
 
     presence_metrics = [c.RECALL, c.PRECISION, c.F1_SCORE, c.TP, c.FP, c.FN, c.JACCARD]
-    estimates_metrics = [c.SUM_ABUNDANCES, c.UNIFRAC, c.UNW_UNIFRAC, c.UNIFRAC_CAMI, c.UNW_UNIFRAC_CAMI, c.L1NORM, c.BRAY_CURTIS]
+    estimates_metrics = [c.SUM_ABUNDANCES, c.UNIFRAC_CAMI, c.UNW_UNIFRAC_CAMI, c.UNIFRAC, c.UNW_UNIFRAC, c.L1NORM, c.BRAY_CURTIS]
     alpha_diversity_metrics = [c.OTUS, c.SHANNON_DIVERSITY, c.SHANNON_EQUIT]
-    rank_independent_metrics = [c.UNIFRAC, c.UNW_UNIFRAC, c.UNIFRAC_CAMI, c.UNW_UNIFRAC_CAMI]
+    rank_independent_metrics = [c.UNIFRAC_CAMI, c.UNW_UNIFRAC_CAMI, c.UNIFRAC, c.UNW_UNIFRAC]
 
     if c.FP + c.UNFILTERED_SUF in pd_metrics['metric'].values:
         presence_metrics = [[metric, metric + c.UNFILTERED_SUF] for metric in presence_metrics]
@@ -339,8 +339,8 @@ def get_html_dict(metrics):
                       (c.JACCARD, c.TOOLTIP_JACCARD),
                       (c.UNIFRAC, c.TOOLTIP_UNIFRAC),
                       (c.UNW_UNIFRAC, c.TOOLTIP_UNW_UNIFRAC),
-                      (c.UNIFRAC_CAMI, c.TOOLTIP_UNIFRAC),
-                      (c.UNW_UNIFRAC_CAMI, c.TOOLTIP_UNW_UNIFRAC),
+                      (c.UNIFRAC_CAMI, c.TOOLTIP_UNIFRAC_CAMI),
+                      (c.UNW_UNIFRAC_CAMI, c.TOOLTIP_UNW_UNIFRAC_CAMI),
                       (c.L1NORM, c.TOOLTIP_L1NORM),
                       (c.BRAY_CURTIS, c.TOOLTIP_BRAY_CURTIS),
                       (c.OTUS, c.TOOLTIP_OTUS),

diff --git a/src/utils/constants.py b/src/utils/constants.py
@@ -30,8 +30,10 @@
 UNFILTERED_SUF = ' (unfiltered)'
 ALL_METRICS = [UNIFRAC, UNW_UNIFRAC, UNIFRAC_CAMI, UNW_UNIFRAC_CAMI, SUM_ABUNDANCES, L1NORM, PRECISION, RECALL, F1_SCORE, TP, FP, FN, JACCARD, SHANNON_DIVERSITY, SHANNON_EQUIT, BRAY_CURTIS]
 
-TOOLTIP_UNIFRAC = 'tree-based measure of similarity between the true and predicted abundances at all taxonomic ranks ranging from 0 (high similarity) to 16 (low similarity).'
-TOOLTIP_UNW_UNIFRAC = 'similar to the weighted UniFrac error, but measuring how well a profiler correctly identified the presence and absence of taxa instead of their relative abundances. The maximal value is the product of the number of taxonomic ranks minus 1 and the number of true taxa at all taxonomic ranks.'
+TOOLTIP_UNIFRAC = 'weighted UniFrac computed from tree with branch lengths defined by a function.'
+TOOLTIP_UNW_UNIFRAC = 'unweighted UniFrac computed from tree with branch lengths defined by a function.'
+TOOLTIP_UNIFRAC_CAMI = 'tree-based measure of similarity between the true and predicted abundances at all taxonomic ranks ranging from 0 (high similarity) to 16 (low similarity).'
+TOOLTIP_UNW_UNIFRAC_CAMI = 'similar to the weighted UniFrac error, but measuring how well a profiler correctly identified the presence and absence of taxa instead of their relative abundances. The maximal value is the product of the number of taxonomic ranks minus 1 and the number of true taxa at all taxonomic ranks.'
 TOOLTIP_L1NORM = 'sum of the absolute differences between the true and predicted abundances at the selected taxonomic rank ranging from 0 (perfect reconstruction of the relative abundances) to 2 (totally incorrect reconstruction).'
 TOOLTIP_PRECISION = 'ratio of taxa correctly predicted as present and all predicted taxa at the selected taxonomic rank. It ranges from 0 (worst) to 1 (best).'
 TOOLTIP_RECALL = 'ratio of taxa correctly predicted as present and all taxa present at the selected taxonomic rank. It ranges from 0 (worst) to 1 (best).'