Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pangolin4 update #224

Merged
merged 11 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 40 additions & 40 deletions bin/summary_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class SummaryReport():
scorpio_constellations_version = None
variants_table = None
pangolin_version = None
pangolearn_version = None
pangolindata_version = None
nextclade_version = None
nextcladedata_version = None
tabledata = None
Expand Down Expand Up @@ -110,20 +110,17 @@ def add_poreCov_version_param(self, porecov_version):


def add_pangolin_version_param(self):
if self.pangolearn_version is None:
error('add_pangolin_version_param() called before parse_pangolin_version()')
warning_msg = f' - <font color="{self.color_error_red}"><b>Warning</b>: A rather old version of PangoLEARN was used ({self.pangolearn_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!</font>'
if self.pangolindata_version is None:
error('add_pangolin_version_param() called before pangolin version was set')
warning_msg = f' - <font color="{self.color_error_red}"><b>Warning</b>: A rather old version of pangolin-data was used ({self.pangolindata_version}). Use parameter \'--update\' to force the use of the most recent Pangolin container!</font>'

# pa_param = f'<a href="https://cov-lineages.org/pangolin.html"><b>Pangolin</b></a> version'
# pa_val = f'{self.pangolin_version}'
pl_param = f'<a href="https://cov-lineages.org/resources/pangolin/pangolearn.html"><b>PangoLEARN</b></a> version'
pl_val = f'{self.pangolearn_version}'
pl_param = f'<a href="https://cov-lineages.org/resources/pangolin/requirements.html"><b>pangolin-data</b></a> version'
pl_val = f'{self.pangolindata_version}'

year, month, day = self.pangolearn_version.split('-')
if int(year) <= 2021 and int(month) <= 10:
v1, v2, v3 = self.pangolindata_version.split('.')
if int(v1) <= 1 and int(v2) <= 2 and int(v3) <= 132:
pl_val += warning_msg

# self.add_param(pa_param, pa_val)
self.add_param(pl_param, pl_val)


Expand Down Expand Up @@ -162,22 +159,6 @@ def parse_version_config(self, version_config_file):
log('Parsed version config file.')


def parse_scorpio_versions(self, scorpio_version, sc_constell_version):
# e.g. 'scorpio 0.3.14'
name, vers = scorpio_version.split(' ')
assert name == 'scorpio'
self.scorpio_version = vers.lstrip('v')

# e.g. 'constellations v0.0.24'
name, vers = sc_constell_version.split(' ')
assert name == 'constellations'
self.scorpio_constellations_version = vers.lstrip('v')


def parse_pangolin_version(self, pangolin_docker):
# e.g. nanozoo/pangolin:2.3.8--2021-04-21
self.pangolin_version, self.pangolearn_version = pangolin_docker.split(':',1)[-1].split('--')

def parse_nextclade_version(self, nextclade_docker):
# e.g. nanozoo/nextclade:1.3.0--2021-06-25
self.nextclade_version, self.nextcladedata_version = nextclade_docker.split(':',1)[-1].split('--')
Expand Down Expand Up @@ -377,10 +358,20 @@ def add_pangolin_results(self, pangolin_results):
self.force_index_dtype_string(res_data)
self.check_and_init_tabledata(res_data.index)

# pangolin and scorpio versions
# column names used:
# version,pangolin_version,scorpio_version,constellation_version
assert res_data.shape[0] > 0
self.pangolin_version = res_data.iloc[0]['pangolin_version']
self.pangolindata_version = res_data.iloc[0]['version'].split('-',1)[-1].split('v',1)[-1]
self.scorpio_version = res_data.iloc[0]['scorpio_version']
self.scorpio_constellations_version = res_data.iloc[0]['constellation_version']


# get data
self.add_column_raw('pangolin_lineage', res_data['lineage'])
self.add_column_raw('pangolin_conflict', res_data['conflict'])


res_data['lineage_conflict'] = [f'<b>{l}</b><br>({p if pd.notnull(p) else "-"})' for l,p in zip(res_data['lineage'], res_data['conflict'])]
colname = 'Lineage<br>(conflict)'

Expand All @@ -399,13 +390,16 @@ def add_pangolin_results(self, pangolin_results):
res_data.at[row, 'lineage_conflict'] += f'<br><font color="{color}"><b>{var_status}</b></font>'

self.add_column_raw('variant_status', res_data['variant_status'])
self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the <a href="{args.variants_table}">variants table</a> of <a href="https://github.com/3dgiordano/SARS-CoV-2-Variants">SARS-CoV-2-Variants</a>.')
self.add_col_description(f'Variant type (VOC, VOI, etc.) was determined from the <a href="{args.variants_table}">variants table</a> ' + \
'of <a href="https://github.com/3dgiordano/SARS-CoV-2-Variants">SARS-CoV-2-Variants</a>.')


self.add_column(colname, res_data['lineage_conflict'])
if self.pangolin_version is None or self.pangolearn_version is None:
error('No pangolin/pangoLEARN versions were added before adding pangolin results.')
self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with <a href="https://cov-lineages.org/pangolin.html">Pangolin</a> (v{self.pangolin_version} using <a href="https://cov-lineages.org/resources/pangolin/pangolearn.html">PangoLEARN</a> data release {self.pangolearn_version}).')
if self.pangolin_version is None or self.pangolindata_version is None:
error('No pangolin/pangolin-data versions were added before adding pangolin results.')
self.add_col_description(f'Lineage and the corresponding tree resolution conflict measure were determined with ' + \
f'<a href="https://cov-lineages.org/pangolin.html">Pangolin</a> (v{self.pangolin_version} using ' + \
f'<a href="https://cov-lineages.org/resources/pangolin/requirements.html">pangolin-data</a> v{self.pangolindata_version}).')

# Add scorpio info if any is present
if res_data['scorpio_call'].notna().any():
Expand All @@ -419,7 +413,10 @@ def add_pangolin_results(self, pangolin_results):
self.add_column('Constellation<br>(conflict)', res_data['scorpio_conflict'])
if self.scorpio_version is None or self.scorpio_constellations_version is None:
error('No Scorpio/constellations versions were added before adding Pangolin results.')
self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with <a href="https://github.com/cov-lineages/scorpio">Scorpio</a> (v{self.scorpio_version} using <a href="https://cov-lineages.org/constellations.html">Constellations</a> version {self.scorpio_constellations_version}).')
self.add_col_description(f'Constellation and the corresponding tree resolution conflict measure were determined with ' + \
f'<a href="https://github.com/cov-lineages/scorpio">Scorpio</a> (v{self.scorpio_version} using ' + \
f'<a href="https://cov-lineages.org/constellations.html">Constellations</a> version {self.scorpio_constellations_version}).')



def add_president_results(self, president_results):
Expand Down Expand Up @@ -505,6 +502,13 @@ def add_nextclade_results(self, nextclade_results):
self.add_column_raw('nextclade_insertions', res_data['aaInsertions'])
self.add_column_raw('nextclade_frameshifts', res_data['frameShifts'])

# private mutation information
self.add_column_raw('nextclade_privateNucMutations_reversion', res_data["privateNucMutations.reversionSubstitutions"])
self.add_column_raw('nextclade_privateNucMutations_labeled', res_data["privateNucMutations.labeledSubstitutions"])
self.add_column_raw('nextclade_privateNucMutations_unlabeled', res_data["privateNucMutations.unlabeledSubstitutions"])
self.add_column_raw('nextclade_privateNucMutations_qc_status', res_data["qc.privateMutations.status"])


res_data['mutations_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaSubstitutions']]
res_data['deletions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaDeletions']]
res_data['insertions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaInsertions']]
Expand Down Expand Up @@ -560,7 +564,8 @@ def get_markup_with_toggle_and_tag(tag):

if self.nextclade_version is None or self.nextcladedata_version is None:
error('No nextclade/nextcladedata versions were added before adding nextclade results.')
self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with <a href="https://clades.nextstrain.org/">Nextclade</a> (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).')
self.add_col_description(f'Clade, mutations, deletions, insertions and frameshifts were determined with ' + \
f'<a href="https://clades.nextstrain.org/">Nextclade</a> (v{self.nextclade_version} using nextclade data release {self.nextcladedata_version}).')



Expand Down Expand Up @@ -782,16 +787,13 @@ def get_lineage_status(self, lineage):
log('Started summary_report.py ...')

parser = argparse.ArgumentParser(description='Generate a summary report for multiple samples run with poreCov')
parser.add_argument("-v", "--version_config", help="version config", required=True)
parser.add_argument("--scorpio_version", help="scorpio version", required=True)
parser.add_argument("--scorpio_constellations_version", help="scorpio constellations version", required=True)
parser.add_argument("-v", "--version_config", help="version config", required=True)
parser.add_argument("--variants_table", help="variants table with VOCs, VOIs etc.", required=True)
parser.add_argument("--porecov_version", help="porecov version", required=True)
parser.add_argument("--guppy_used", help="guppy used")
parser.add_argument("--guppy_model", help="guppy model")
parser.add_argument("--medaka_model", help="medaka model")
parser.add_argument("--nf_commandline", help="full nextflow command call", required=True)
parser.add_argument("--pangolin_docker", help="pangolin/pangoLEARN version", required=True)
parser.add_argument("--nextclade_docker", help="nextclade/nextcladedata version", required=True)
parser.add_argument("--primer", help="primer version")
parser.add_argument("-p", "--pangolin_results", help="pangolin results")
Expand All @@ -806,9 +808,7 @@ def get_lineage_status(self, lineage):
### build report
report = SummaryReport()
report.parse_version_config(args.version_config)
report.parse_scorpio_versions(args.scorpio_version, args.scorpio_constellations_version)
report.parse_variants_table(args.variants_table)
report.parse_pangolin_version(args.pangolin_docker)
report.parse_nextclade_version(args.nextclade_docker)


Expand Down
17 changes: 0 additions & 17 deletions modules/get_scorpio_version.nf

This file was deleted.

2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ params {
update = false
screen_reads = false
screen_reads_plot_cutoff = 0.03
defaultpangolin = 'nanozoo/pangolin:3.1.20--2022-02-28'
defaultpangolin = 'nanozoo/pangolin-v4:4.0--1.2.133'
defaultnextclade = 'nanozoo/nextclade:1.10.3--2022-02-07'

// parameters
Expand Down
8 changes: 3 additions & 5 deletions workflows/create_summary_report.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
include { summary_report; summary_report_fasta; summary_report_default } from './process/summary_report'
include { plot_coverages } from '../modules/plot_coverages.nf'
include { get_scorpio_version } from '../modules/get_scorpio_version.nf'
include { get_variants_classification } from '../modules/get_variants_classification.nf'
include { lcs_plot } from './process/lcs_sc2'

Expand All @@ -16,7 +15,6 @@ workflow create_summary_report_wf {

main:
version_ch = Channel.fromPath(workflow.projectDir + "/configs/container.config")
scorpio_ver_ch = get_scorpio_version()
variants_table_ch = get_variants_classification()

pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true)
Expand All @@ -27,16 +25,16 @@ workflow create_summary_report_wf {
alignment_files = alignments.map {it -> it[0]}.collect()
if (params.fasta || workflow.profile.contains('test_fasta')) {

summary_report_fasta(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results)
summary_report_fasta(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results)

} else {
kraken2_results = kraken2.map {it -> it[2]}.collect()
// sort by sample name, group in lists of 6, collect the grouped plots
coverage_plots = plot_coverages(alignments.map{it -> it[0]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6), \
alignments.map{it -> it[1]}.toSortedList({ a, b -> a.simpleName <=> b.simpleName }).flatten().collate(6)).collect()

if (params.samples) { summary_report(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
else { summary_report_default(version_ch, scorpio_ver_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }
if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }

}

Expand Down
12 changes: 0 additions & 12 deletions workflows/process/summary_report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ process summary_report {

input:
path(version_config)
tuple val(scorpio_ver), val(scorpio_constellations_ver)
path(variants_table)
path(pangolin_results)
path(president_results)
Expand All @@ -32,16 +31,13 @@ process summary_report {

summary_report.py \
-v !{version_config} \
--scorpio_version "!{scorpio_ver}" \
--scorpio_constellations_version "!{scorpio_constellations_ver}" \
--variants_table !{variants_table} \
--porecov_version !{workflow.revision}:!{workflow.commitId}:!{workflow.scriptId} \
--nextclade_docker !{params.nextcladedocker} \
--guppy_used !{guppyused} \
--guppy_model !{params.guppy_model} \
--medaka_model !{params.medaka_model} \
--nf_commandline '!{workflow.commandLine}' \
--pangolin_docker !{params.pangolindocker} \
--primer !{params.primerV} \
-p !{pangolin_results} \
-q !{president_results} \
Expand All @@ -63,7 +59,6 @@ process summary_report_default {

input:
path(version_config)
tuple val(scorpio_ver), val(scorpio_constellations_ver)
path(variants_table)
path(pangolin_results)
path(president_results)
Expand All @@ -89,15 +84,12 @@ process summary_report_default {

summary_report.py \
-v !{version_config} \
--scorpio_version "!{scorpio_ver}" \
--scorpio_constellations_version "!{scorpio_constellations_ver}" \
--variants_table !{variants_table} \
--porecov_version !{workflow.revision}:!{workflow.commitId}:!{workflow.scriptId} \
--guppy_used !{guppyused} \
--guppy_model !{params.guppy_model} \
--medaka_model !{params.medaka_model} \
--nf_commandline '!{workflow.commandLine}' \
--pangolin_docker !{params.pangolindocker} \
--nextclade_docker !{params.nextcladedocker} \
--primer !{params.primerV} \
-p !{pangolin_results} \
Expand All @@ -117,7 +109,6 @@ process summary_report_fasta {
label 'fastcov'
input:
path(version_config)
tuple val(scorpio_ver), val(scorpio_constellations_ver)
path(variants_table)
path(pangolin_results)
path(president_results)
Expand All @@ -131,12 +122,9 @@ process summary_report_fasta {
"""
summary_report.py \
-v ${version_config} \
--scorpio_version "${scorpio_ver}" \
--scorpio_constellations_version "${scorpio_constellations_ver}" \
--variants_table ${variants_table} \
--porecov_version ${workflow.revision}:${workflow.commitId}:${workflow.scriptId} \
--nf_commandline '${workflow.commandLine}' \
--pangolin_docker ${params.pangolindocker} \
--nextclade_docker ${params.nextcladedocker} \
-p ${pangolin_results} \
-q ${president_results} \
Expand Down