From 54f65fe29a0ee106445922a5f8ff0eb3febe7d87 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Fri, 18 Aug 2023 15:10:19 +0200
Subject: [PATCH 01/10] add R script to generate more output plots (some
 modifications from original to fix bugs and make it run)

---
 bin/cigar_parser.R                 |   3 +-
 bin/plotter.R                      | 665 +++++++++++++++++++++++++++++
 conf/modules.config                |   4 +
 modules/local/cigar_parser.nf      |   2 +-
 modules/local/crisprseq_plotter.nf |  42 ++
 workflows/crisprseq_targeted.nf    |  19 +-
 6 files changed, 727 insertions(+), 8 deletions(-)
 create mode 100755 bin/plotter.R
 create mode 100644 modules/local/crisprseq_plotter.nf

diff --git a/bin/cigar_parser.R b/bin/cigar_parser.R
index 6bb972ca..2018d686 100755
--- a/bin/cigar_parser.R
+++ b/bin/cigar_parser.R
@@ -946,7 +946,8 @@ if (dim(alignment_info)[1] != 0){
         dels <- separated_indels %>% filter(Modification == "del")
         dels_count <- dim(dels)[1]
         if ( t_type == "dels-out" || t_type == "dels-in"){
-            dels_count <- dels_count - t_reads
+            t_in_dels <- dels %>% filter(Ids %in% t_ids[[1]])
+            dels_count <- dels_count - dim(t_in_dels)[1]
         }
         ins <- separated_indels %>% filter(Modification == "ins")
         ins_count <- dim(ins)[1]
diff --git a/bin/plotter.R b/bin/plotter.R
new file mode 100755
index 00000000..10ab023d
--- /dev/null
+++ b/bin/plotter.R
@@ -0,0 +1,665 @@
+#!/usr/bin/env Rscript
+
+############################
+#### Plot editing results
+#### author: Marta Sanvicente
+#### modified by: Júlia Mir @mirpedrol
+#### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text.
+#### Original code https://bitbucket.org/synbiolab/crispr-a_nextflow/src/master/bin/get_plots.R
+############################
+
+args = commandArgs(trailingOnly=TRUE)
+
+library(ggplot2)
+library(plyr)
+library(dplyr)
+library(seqinr)
+library(ShortRead)
+library(ggpubr)
+library(ggmsa)
+library(seqmagick)
+library(stringr)
+library(tidyr)
+library(ggseqlogo)
+library(plotly)
+library(cowplot)
+library(optparse)
+
+####################################
+### Load command line arguments ####
+####################################
+
+option_list = list(
+    make_option(c("-i", "--indels_info"), type="character", default=NULL,
+        help="CSV with information of INDEL editions", metavar="character"),
+    make_option(c("-r", "--reference"), type="character", default=NULL,
+        help="Reference fasta file", metavar="character"),
+    make_option(c("-g", "--gRNA_sequence"), type="character", default=NULL,
+        help="gRNA sequence", metavar="character"),
+    make_option(c("-n", "--sample_name"), type="character", default=NULL,
+        help="Sample ID", metavar="character"),
+    make_option(c("-c", "--cut_site"), type="numeric", default=NULL,
+        help="Cut position", metavar="numeric"),
+    make_option(c("-s", "--substitutions_info"), type="character", default=NULL,
+        help="CSV with information of substitution editions", metavar="character")
+);
+
+opt_parser = OptionParser(option_list=option_list);
+opt = parse_args(opt_parser);
+
+sample_name <- opt$sample_name
+indels_info <- opt$indels_info
+reference <- opt$reference
+gR <- opt$gRNA_sequence
+substitutions_info <- opt$substitutions_info
+rel_cut_site <- as.numeric(opt$cut_site)
+
+data <- read.csv(indels_info)
+ref_seq <- readFasta(reference)
+subs_plup <- read.csv(substitutions_info, row.names = 1)
+
+
+######################
+##### CRISPR-GA-1 like plot https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4184265/
+######################
+
+plotIndels_gg <- function(indels_data, cut){
+    # Split insertions from deletions
+    l <- length(indels_data$Modification) + indels_data$wt_reads[1] + data$t_reads[1]
+    dels <- indels_data %>% filter(Modification == "del")
+    ins <- indels_data %>% filter(Modification == "ins")
+
+    #Metadata deletion
+    if (length(dels$X)>0){
+        metadels <- as.data.frame(table(dels$Start,dels$Length))
+        metadels <- metadels[which(metadels$Freq>0),]
+        colnames(metadels) <- c("Start","length","Freq")
+        metadels$End <- 0
+        metadels$changes <- sum((as.integer(as.character(metadels$Freq)) * as.integer(as.character(metadels$length)))) #Now the changes column corrspond to a constant that will be used in the creation of accumulative. This variable stores the total number of changes per position
+
+    for (i in 1:length(metadels$Start)){
+        metadels$End[i] = as.integer(as.character(metadels$Start[i])) + as.integer(as.character(metadels$length[i])) - 1
+    }
+    }else{
+        metadels <- list()
+    }
+
+    exportJson <- jsonlite::toJSON(metadels)
+    write(exportJson, paste(sample_name,"_metadels.json",sep=""))
+
+    #Metadata insertion
+    if(length(ins$X)>0){
+        metains <- as.data.frame(table(ins$Start,ins$Length))
+        metains <- metains[which(metains$Freq>0),]
+        colnames(metains) <- c("Start","length","Freq")
+        metains$End <- 0
+        metains$changes <- sum((as.integer(as.character(metains$Freq)) * as.integer(as.character(metains$length)))) #Now the changes column corrspond to a constant that will be used in the creation of accumulative. This variable stores the total number of changes per position
+
+    for (i in 1:length(metains$Start)){
+        metains$End[i] = as.integer(as.character(metains$Start[i])) + as.integer(as.character(metains$length[i])) - 1
+    }
+    }else{
+        metains <- list()
+    }
+    exportJson <- jsonlite::toJSON(metains)
+    write(exportJson, paste(sample_name,"_metains.json",sep=""))
+
+
+    # Count accumulated insertions and deletions by position
+    accum.dels=c()
+    accum.dels=unlist(apply(dels,1,function(X) return( seq(as.integer(X[3]), as.integer(X[3]) + as.integer(X[4])) )))
+    accum.ins=c()
+    accum.ins=unlist(apply(ins,1,function(X) return( seq(as.integer(X[3]), as.integer(X[3]) + as.integer(X[4])) )))
+
+    # Plot
+    a_dels <- data.frame(location = c(accum.dels))
+    a_ins <- data.frame(location = c(accum.ins))
+
+
+    ###############################################
+    #         ACUMMULATIVE LOCATIONS              #
+    ###############################################
+    if ( dim(dels)[1] != 0 ){
+        acc_dels <- as.data.frame(table(a_dels))
+        acc_dels <- acc_dels[acc_dels$Freq>0,]
+        acc_dels$percent <- (acc_dels$Freq/sum(acc_dels$Freq))*100
+        del_acumm <- plot_ly(y = ~acc_dels$percent, x= ~as.numeric(as.character(acc_dels$location)),type = "bar",hovertemplate = paste('Percentage: %{y}','Counts: ',acc_dels$Freq)) %>%
+            layout(xaxis = list(title="Accumulative Deletion Location",rangeslider = list()), yaxis = list(fixedrange = FALSE,title = "Reads Percentage(%)"), barmode = 'stack')%>%
+            layout(shapes = list(list(
+                type = "line",
+                y0 = 0,
+                y1 = 100,
+                yref = "paper",
+                x0 = cut_site,
+                x1 = cut_site,
+                line = list(color = "red", dash="dot")
+            )))%>%
+            add_text(showlegend = FALSE, x=cut_site, y=max(acc_dels$percent),text = "Cut site", hovertemplate = paste("Cut site ", cut_site),
+                    textfont = list(color= c("red")))
+    }else {
+        acc_dels <- data.frame()
+    }
+
+
+    if ( dim(ins)[1] != 0 ){
+        acc_ins <- as.data.frame(table(a_ins))
+        acc_ins <- acc_ins[acc_ins$Freq>0,]
+        acc_ins$percent <- (acc_ins$Freq/sum(acc_ins$Freq))*100
+
+        ins_acumm <- plot_ly(y = ~acc_ins$percent, x= ~as.numeric(as.character(acc_ins$location)),type = "bar",hovertemplate = paste('Percentage: %{y}','Counts: ',acc_ins$Freq)) %>%
+            layout(xaxis = list(title="Accumulative Insertion Location",rangeslider = list()), yaxis = list(fixedrange = FALSE,title = "Reads Percentage(%)"), barmode = 'stack')%>%
+            layout(shapes = list(list(
+                type = "line",
+                y0 = 0,
+                y1 = 100,
+                yref = "paper",
+                x0 = cut_site,
+                x1 = cut_site,
+                line = list(color = "red", dash="dot")
+            )))%>%
+            add_text(showlegend = FALSE, x=cut_site, y=max(acc_ins$percent),text = "Cut site", hovertemplate = paste("Cut Site ", cut_site),
+                    textfont = list(color= c("red")))
+    }else {
+        acc_ins <- data.frame()
+    }
+
+    if( (dim(ins)[1] != 0) &&  (dim(dels)[1] != 0) ){
+        accummulative <- subplot(del_acumm, ins_acumm, shareY = T,shareX = F,titleX = T,titleY = T)
+    } else if( dim(ins)[1] != 0 ){
+        accummulative <- subplot(ins_acumm, shareY = T,shareX = F,titleX = T,titleY = T)
+    } else {
+        accummulative <- subplot(del_acumm, shareY = T,shareX = F,titleX = T,titleY = T)
+    }
+
+    #Elements for dynamic table
+    sequece_length <- max(indels_data$Start+indels_data$Length)
+    exportJson <- jsonlite::toJSON(sequece_length)
+    write(exportJson, paste(sample_name,"_length.json",sep=""))
+
+    exportJson <- jsonlite::toJSON(l)
+    write(exportJson, paste(sample_name,"_Total_reads.json",sep=""))
+
+
+    #INSERTIONS
+    ###############################################
+    #         LOCATION                            #
+    ###############################################
+
+    pattern = ins
+
+    #defensive programming against file with no insertions
+    if(length(pattern$Modification)>0){
+
+        LOC <- as.data.frame(table(pattern$Start,pattern$Length,pattern$ins_nt))
+        LOC <- LOC[which(LOC$Freq>0),]
+        LOC$percent <- (LOC$Freq/l)*100
+
+
+        #AXIS -> PERCENTAGE, BAR -> COUNT
+        Ins_location <- plot_ly() %>%
+            layout(xaxis = list(title="Insertion Location",rangeslider = list()), yaxis = list(fixedrange = FALSE,title = "Reads Percentage(%)"), barmode = 'stack')
+
+        Ins_location <- Ins_location %>% add_trace(x = ~as.numeric(as.character(LOC$Var1)),
+                                                    y= ~LOC$percent, color = ~LOC$Var3, type = 'bar',
+                                                    marker = list(~LOC$Freq), legendgroup = ~LOC$Var3,
+                                                    hovertemplate = paste('Percentage: %{y}','Counts: ',LOC$Freq))
+
+
+        Ins_location <- Ins_location %>%   layout(shapes = list(list(
+            type = "line",
+            y0 = 0,
+            y1 = 100,
+            yref = "paper",
+            x0 = cut_site,
+            x1 = cut_site,
+            line = list(color = "red", dash="dot")
+        ))) %>% add_text(showlegend = FALSE, x=cut_site,y=max(LOC$percent),text = "Cut site", hovertemplate = paste("Cut site ", cut_site),
+                        textfont = list(color= c("red")))
+
+
+        ###############################################
+        #                SIZES                        #
+        ###############################################
+
+        #AXIS -> PERCENTAGE, BAR -> COUNT
+        Ins_sizes <- plot_ly() %>%
+            layout(xaxis = list(title="Insertion Sizes",rangeslider = list()), yaxis = list(title = "Reads Percentage(%)"), barmode = 'stack')
+        Ins_sizes <- Ins_sizes %>% add_trace(x = ~as.numeric(as.character(LOC$Var2)),
+                                            y= ~LOC$percent,  color = ~LOC$Var3, type = 'bar',
+                                            marker = list(~LOC$Freq), showlegend = F, legendgroup = ~LOC$Var3,
+                                            hovertemplate = paste(' Percentage: %{y}','Count: ',LOC$Freq))
+
+        ###############################################
+        #                MERGE PLOTS                  #
+        ###############################################
+
+        insertions <- subplot(Ins_location,Ins_sizes, titleY = F, titleX = T) %>% layout(title = "Insertions",yaxis = list(title = "Reads Percentage (%)"))
+    }else{
+        insertions<-empty_plot("No Insertions were detected.")
+    }
+
+    #DELETIONS
+    #pattern filter (A,C,T,G) -> NHEJ
+    pattern <- dels
+
+    #defensive programming against file with no insertions
+    if(length(pattern$Modification)){
+
+        for(i in 1:length(pattern$patterns)){
+            if(length(strsplit(pattern$patterns[i],split = "")[[1]])==1){
+                pattern$patterns[i] <- "NHEJ"
+            }
+        }
+
+        ###############################################
+        #         LOCATION                            #
+        ###############################################
+
+
+        LOC <- as.data.frame(table(pattern$Start,pattern$patterns,pattern$Length))
+        LOC <- LOC[which(LOC$Freq>0),]
+        LOC$percent <- (LOC$Freq/l)*100
+
+
+        #AXIS -> PERCENTAGE, BAR -> COUNT
+        Del_location <- plot_ly() %>%
+        layout(xaxis = list(title="Deletion Location",rangeslider = list()), yaxis = list(fixedrange = FALSE,title = "Reads Percentage(%)"), barmode = 'stack')
+        Del_location <- Del_location %>% add_trace(x = ~as.numeric(as.character(LOC$Var1)),
+                                                y= ~LOC$percent, color = ~LOC$Var2, type = 'bar',
+                                                marker = list(~LOC$Freq),  legendgroup = ~LOC$Var2,
+                                                hovertemplate = paste('Percentage: %{y}','Counts: ',LOC$Freq))
+
+        Del_location <- Del_location %>%   layout(shapes = list(list(
+            type = "line",
+            y0 = 0,
+            y1 = 100,
+            yref = "paper",
+            x0 = cut_site,
+            x1 = cut_site,
+            line = list(color = "red", dash="dot")
+        )))%>%
+        add_text(showlegend = FALSE, x=cut_site,y=max(LOC$percent),text = "Cut site", hovertemplate = paste("Cut site ", cut_site),
+                textfont = list(color= c("red")))
+
+
+        ###############################################
+        #                SIZES                        #
+        ###############################################
+
+        #AXIS -> PERCENTAGE, BAR -> COUNT
+        Del_sizes <- plot_ly() %>%
+        layout(xaxis = list(title="Deletion Sizes",rangeslider = list()), yaxis = list(title = "Reads Percentage(%)"), barmode = 'stack')
+        Del_sizes <- Del_sizes %>% add_trace(x = ~as.numeric(as.character(LOC$Var3)),
+                                            y= ~LOC$percent, color = ~LOC$Var2, type = 'bar',
+                                            marker = list(~LOC$Freq), showlegend=F, legendgroup = ~LOC$Var2,
+                                            hovertemplate = paste('Size: ',LOC$Var3,' Percentage: %{y}'))
+
+
+        ###############################################
+        #                MERGE PLOTS                  #
+        ###############################################
+
+        deletions <- subplot(Del_location,Del_sizes, titleY = F, titleX = T) %>% layout(title = "Deletions",yaxis = list(title = "Reads Percentage (%)"))
+    }else{
+        deletions<-empty_plot("No deletions were detected.")
+    }
+    plots <- list(deletions,insertions,accummulative)
+    return(plots)
+}
+
+#######
+#### Reverse complement
+#######
+strComp=function(X){
+    return(c2s(rev(comp(s2c(X)))))
+}
+
+######
+### Cut site
+######
+get_cutSite <- function(gRNA_seq, reference, rel_cut){
+    ### gRNA seq to upper case
+    gRNA_seq <- toupper(gRNA_seq)
+    ### Check orientation of gRNA in reference sequence and cut site
+    rvComp_seq <- strComp(as.character(sread(reference)[[1]]))
+    align <- pairwiseAlignment(toupper(gRNA_seq), toupper(as.character(sread(reference)[[1]])), type="global-local")
+    alignRevComp <- pairwiseAlignment(toupper(gRNA_seq), toupper(rvComp_seq), type="global-local")
+
+    if (score(align) > score(alignRevComp)){
+        if (rel_cut > 0){
+            cut_site <- start(subject(align))+rel_cut-1
+        } else {
+            cut_site <- start(subject(align)) + ( nchar(gRNA_seq) + rel_cut ) -1
+        }
+    } else {
+        if (rel_cut > 0){
+            cut_site <- nchar(as.character(sread(reference)[[1]])) - end(subject(alignRevComp)) + (nchar(gRNA_seq)-rel_cut)
+        } else {
+            cut_site <- nchar(as.character(sread(reference)[[1]])) - end(subject(alignRevComp)) - rel_cut
+        }
+    }
+    return(cut_site)
+}
+
+
+#######
+#### Empty Plots
+#######
+empty_plot <- function(title = NULL){
+    p <- plotly_empty(type = "scatter", mode = "markers") %>%
+        config(
+            displayModeBar = FALSE
+        ) %>%
+        layout(
+            title = list(
+            text = title,
+            yref = "paper",
+            y = 0.5
+            )
+        )
+    return(p)
+}
+
+######################
+##### Alleles percentages plot
+######################
+
+indel_count_seq <- function(indel_data){
+    # Get table with percentages of indels among all genotypes
+    indel_count <- indel_data %>% group_by(Start, Length, Modification) %>% dplyr::summarise(freq=n())
+    #indel_count <- plyr::count(indel_data, vars = c("Start", "Length", "Modification"))
+    indel_count <- indel_count[order(indel_count$freq, decreasing = TRUE),]
+    indel_count$Perc <- (indel_count$freq/sum(indel_count$freq)) * 100
+    return(as.data.frame(indel_count))
+}
+
+get_sequences <- function(indels, ref_seq){
+    edited_sequences <- c()
+    seqs <- lapply(c(1:dim(indels)[1]), function(num) {
+        ref_seq_test <- as.character(sread(ref_seq))
+        if (indels$Modification[[num]] == "del"){
+            substr(ref_seq_test, indels$Start[[num]], indels$Start[[num]]+indels$Length[[num]]-1) <- paste0(rep("-", indels$Length[[num]]), collapse = "")
+            edited_sequences <- c(edited_sequences, ref_seq_test)
+        } else {
+            seq <- paste(substring(ref_seq_test, 1, indels$Start[[num]]), paste0(rep("N", indels$Length[[num]]), collapse = ""), substring(ref_seq_test, indels$Start[[num]]+1, nchar(ref_seq_test)), sep = "")
+            edited_sequences <- c(edited_sequences, seq)
+        }
+    })
+}
+
+######################
+##### Substitutions at -+25 of the gRNA plot
+######################
+subs_plot <- function(subsperc, gRNA_seq, cut_site){
+    ### Get the gRNA region
+    pre_cut_site <- cut_site - (nchar(gRNA_seq)-3)  ## insted of 17 to allow different gRNA lengths instead of only 20
+    post_cut_site <- cut_site + 3 + 1
+    ### gRNA nucleotides to use them in the axis
+    ref_nt <- stringr::str_split(gRNA_seq, "")[[1]]
+    ### Get the plot
+    plot <- ggplot(data=subsperc %>% filter(pos > pre_cut_site - 25) %>% filter(pos < post_cut_site + 25), aes(x=ordered(pos), y=percentage, fill=nucleotide, alpha=ifelse(percentage<95,1,0))) +
+        geom_bar(stat="identity") + theme_classic() + scale_fill_manual("Nuclotides", values = c("A" = "#109648", "C" = "#86b7ed", "G" = "#f7b32b", "T" = "#d62839", "-" = "#f2f2f2")) +
+        scale_x_discrete(breaks = (pre_cut_site+1):(post_cut_site-1), labels = ref_nt) + xlab(NULL) + scale_alpha(guide = 'none') + ylab('nt (%)') +
+        geom_text(aes(label = ifelse(percentage<50 & percentage>5,percentage,"")),
+                hjust = 0, vjust = 1.5, angle = 90, nudge_x = -.5,
+                size = 2.5) +
+        theme(text = element_text(size = 11), legend.position = "bottom")
+    return(plot)
+}
+
+############
+### Substitutions logo plot
+############
+subs_logo_plot <- function(subsperc, gRNA_seq, cut_site){
+    ### Positions related to the cut site
+    pre_cut_site <- cut_site - (nchar(gRNA_seq)-3)
+    post_cut_site <- cut_site + 6 + 1
+    #
+    ### From percentages table to matrix
+    fper_filtered <- subsperc %>% filter(pos > pre_cut_site) %>% filter(pos < post_cut_site) %>% filter(nucleotide != "-")
+    nuc_pos <- pivot_wider(fper_filtered, names_from = pos, values_from = percentage)
+    nuc_pos[is.na(nuc_pos)] <- 0
+    nuc_pos[nuc_pos == 100] <- 0
+    nuc_pos_matrix <- data.matrix(nuc_pos[,-1])
+    nuc_pos_matrix <- nuc_pos_matrix+0.00001 ### This is done to aboid problemes in case of a matrix full of 0s
+    rownames(nuc_pos_matrix) <- nuc_pos$nucleotide
+    #
+    ### gRNA nucleotides to use them in the axis
+    ref_nt <- c(str_split(gRNA_seq, "")[[1]], "N", "G", "G")
+    logo <- ggseqlogo(nuc_pos_matrix, method='custom', seq_type='dna') + ylab('nt (%)')
+    logo$scales$scales[[1]] <- scale_x_continuous(breaks= 1:length(ref_nt),labels=ref_nt)
+    logo_plot <- logo + annotate('rect', xmin = length(ref_nt)-2.5, xmax = length(ref_nt)+0.5, ymin = -5, ymax = 105, alpha = .1, col='black', fill='yellow')
+    return(logo_plot)
+}
+
+##################
+### Top variants plots
+##################
+
+### Logo plot of top indels
+get_logo_top_vars <- function(selected_reference, change_len, pattern_indel, modification, str_pos, cut_site){
+    # This function is used to generate the logo of the alleles that apperar most times in data.
+    all_pre_nt <- list(A = "", C = "", T = "", G = "")
+    for (j in c("A", "C", "G", "T")){
+        per_nt <- lapply(1:length(selected_reference), {
+        function(i){
+            if(i < 11 || i > 10+l || modification == "ins"){
+                if(selected_reference[i] == j){
+                    return(1-0.00001)
+                } else {
+                    return(0.00001)
+                }
+            } else {
+                return(0.00001)
+            }
+        }
+        })
+    all_pre_nt[[j]] <- per_nt
+    }
+    ## Data frame with the probability of each position
+    df<-NULL
+    df <- rbind(df, unlist(all_pre_nt$A))
+    df <- rbind(df, unlist(all_pre_nt$C))
+    df <- rbind(df, unlist(all_pre_nt$G))
+    df <- rbind(df, unlist(all_pre_nt$T))
+    pos_matrix <- data.matrix(df)
+    rownames(pos_matrix) <- c("A", "C", "G", "T")
+    if(modification == "ins"){
+        cut_correction <- change_len
+    } else {
+        cut_correction <- 0
+    }
+
+    logo <- ggseqlogo(pos_matrix, method='custom', seq_type='dna') + ylab('') +
+        theme(plot.title = element_text(size = 12 , face = "bold" , hjust = 0.5),
+            axis.text.x = element_blank(),
+            axis.text.y = element_blank()) +
+        ggtitle(paste0(l, "nt ", mod, "; Start: ", str_pos)) +
+        annotate('segment', x = cut_site-str_pos+11+0.5+cut_correction , xend=cut_site-str_pos+11+0.5+cut_correction, y=-0.08, yend=1.08, linewidth=1, color="red")
+
+    if (is.na(pattern_indel) && modification != "ins"){
+        return(logo)
+    } else if (modification == "ins"){
+        logo_plot <- logo + annotate('rect', xmin = 11-0.5, xmax = 10+change_len+0.5, ymin = -0.05, ymax = 1.05, alpha = .1, col='black', fill='yellow')
+        return(logo_plot)
+    } else if (pattern_indel == "NHEJ"){
+        return(logo)
+    } else {
+        mh_len <- nchar(pattern_indel)
+        logo_plot <- logo + annotate('rect', xmin = 11+change_len-0.5, xmax = 10+change_len+mh_len+0.5, ymin = -0.05, ymax = 1.05, alpha = .1, col='black', fill='yellow')
+        return(logo_plot)
+    }
+}
+
+
+
+##################
+### Get all plots
+##################
+
+#load(subs_info)
+#empty data troubleshooter, notice that	3 is choose to avoid counting small empty df product of	read.csv function
+a<-as.character(data)
+sampleName<-gsub('.{11}$', '', a)
+checkFaulty<-grep('Faulty', sampleName)
+a<-as.character(indels_info)
+a<-gsub('.{11}$', '', a)
+checkEmpty<-grep('Empt', a)
+checkFaulty<-grep('Badl', a)
+if (dim(data)[2]>3 && length(checkFaulty) == 0 && length(checkEmpty) == 0){ ### Substitutions percentages plot
+    cut_site <- get_cutSite(gR, ref_seq, rel_cut_site)
+    if (dim(subs_plup)[1] == 0){
+        system(paste0("touch ", sample_name, "_subs-perc_plot.png"))
+    } else {
+        sp <- subs_plot(subs_plup, gR, cut_site)
+        ggsave(paste0(sample_name, "_subs-perc_plot.png"), width = 12, height = 1.5)
+        ### Substitutions logo plot
+        sp_logo <- subs_logo_plot(subs_plup, gR, cut_site)
+        ggsave(paste0(sample_name, "_subs-perc_plot_LOGO.png"),  width = 12, height = 1.5)
+    }
+
+    ### Counts plot
+    #GET THE HTMLS
+    figure_counts <- plotIndels_gg(indels_data = data, cut =  cut_site)
+    htmlwidgets::saveWidget(as_widget(figure_counts[[1]]), paste0(sample_name,"_Deletions.html"))
+    htmlwidgets::saveWidget(as_widget(figure_counts[[2]]), paste0(sample_name,"_Insertions.html"))
+    htmlwidgets::saveWidget(as_widget(figure_counts[[3]]), paste0(sample_name,"_accumulative.html"))
+
+    ### Alleles plot (deletions)
+    indel_count <- indel_count_seq(data)
+    dels_count <- indel_count %>% filter(Modification == "del") ## Just deletions
+    if (dim(dels_count)[1] != 0){
+        dels_count$Start <- as.numeric(as.character(dels_count$Start))
+        dels_count$Length <- as.numeric(as.character(dels_count$Length))
+        seqs <- get_sequences(dels_count, ref_seq)
+
+        # Write fasta with the sequences and use it to get the alleles plot
+        if (length(seqs) < 10){
+            rep_seqs <- seqs[1:length(seqs)]
+        } else {
+            rep_seqs <- seqs[1:10]
+        }
+        write.fasta(sequences = rep_seqs, names = paste0(round(dels_count$Perc, 2), "_", dels_count$Modification, dels_count$Length, "_", seq(1, dim(dels_count)[1]))[1:10], file.out = paste0(sample_name, "_top10.fa"))
+        nt_sequences <- paste0(sample_name, "_top10.fa")
+        figure_alleles <- ggmsa(nt_sequences, cut_site-40, cut_site+40, color = "Chemistry_NT", seq_name = TRUE)
+        ggsave(paste0(sample_name, "_delAlleles_plot.png"))
+    } else {
+        system(paste0("touch ",	sample_name, "_delAlleles_plot.png"))
+    }
+
+  ######### Low variability or top variants plot
+  wt <- data$wt_reads[1] ### 7299
+  templata_based <- data$t_reads[1] ### 0
+  total_char <- wt + templata_based + dim(data)[1]
+
+  delCols_indels <- data %>% group_by(Modification, Start, Length, ins_nt, patterns) %>% dplyr::summarize(freq = n())
+  unique_variants <- rbind(as.data.frame(delCols_indels), c("wt", 0, 0, NA, NA, wt), c("template-based", 0, 0, NA, NA, templata_based))
+  uniq_indels_sorted <- unique_variants[order(as.numeric(unique_variants$freq), decreasing = TRUE),]
+  write.csv(uniq_indels_sorted,file=paste0(sample_name, "_unique-variants.csv"))
+
+  # Check if there are enogh indels to have 5 top
+  if(dim(uniq_indels_sorted)[1] < 5 ){
+    num_top <- dim(delCols_indels)[1]
+  } else { num_top <- 5 }
+
+  # Get to variants
+  top_5 <- uniq_indels_sorted[1:num_top,]
+
+  top5_names <- lapply(c(1:dim(top_5)[1]),
+                       function(i){
+                         if( top_5[i,]$Modification == "del" ) {
+                           return(paste0(top_5[i,]$Start, "_", top_5[i,]$Modification, top_5[i,]$Length, "_", top_5[i,]$pattern))
+                         } else if ( top_5[i,]$Modification == "ins" ) {
+                           return(paste0(top_5[i,]$Start, "_", top_5[i,]$Modification, top_5[i,]$ins_nt))
+                         } else {
+                           return(top_5[i,]$Modification)
+                         }
+                       }
+  )
+
+  wt_pos <- which(unlist(top5_names) == "wt")
+  if (length(wt_pos) == 0){
+    cols_list = c("#bebebe", rep("#9394f7", num_top + 1))
+  } else if (wt_pos == num_top){
+    cols_list = c("#bebebe", "#9394f7", rep("#9394f7", num_top - 1), "#1cf453")
+  } else if (wt_pos == 1) {
+    cols_list = c("#bebebe", "#9394f7", "#1cf453", rep("#9394f7", num_top - 1))
+  } else {
+    cols_list = c("#bebebe", rep("#9394f7", wt_pos), "#1cf453", rep("#9394f7", num_top-wt_pos))
+  }
+
+  reads_classes <- c("Other alleles", "Top alleles", unlist(top5_names))
+  reads_counts <- c(total_char - sum(as.numeric(top_5$freq)), sum(as.numeric(top_5$freq)), as.numeric(top_5$freq))
+  reads_summary <- data.frame(classes = unlist(reads_classes), counts = unlist(reads_counts))
+  reads_summary$parents =  c("", "", rep("Top alleles", num_top))
+  fig <- plot_ly(reads_summary,
+                 labels = ~ classes,
+                 parents = ~ parents,
+                 values = ~ counts,
+                 type = 'sunburst',
+                 branchvalues = 'total',
+                 textinfo = "label+percent entry",
+                 marker = list(colors = cols_list, color = "black"),
+                 textfont = list(color = '#000000', size = 20)
+  )
+
+  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_top.html"))
+
+  ### Logo plot
+  all_each_logo <- list()
+  list_num <- 1
+  sel_top <- top_5 %>% filter(Modification %in% c("del", "ins"))
+  if (dim(sel_top)[1] > 0){
+    for (i in c(1:dim(sel_top)[1])){
+      if (sel_top[i,]$Modification == "ins"){
+        selfil_1 <- data %>% filter(Modification ==  sel_top[i,]$Modification) %>% filter(Start ==  sel_top[i,]$Start) %>% filter(Length ==  sel_top[i,]$Length) %>%  filter(ins_nt ==  sel_top[i,]$ins_nt)
+      } else {
+        selfil_1 <- data %>% filter(Modification ==  sel_top[i,]$Modification) %>% filter(Start ==  sel_top[i,]$Start) %>% filter(Length ==  sel_top[i,]$Length)
+      }
+      s <- selfil_1[1,]$Start
+      l <- selfil_1[1,]$Length
+      if( s > 12  && (nchar(as.character(sread(ref_seq)[[1]])) > (s+l+9) )){
+        ref_splited <- c(str_split(toupper(as.character(sread(ref_seq)[[1]])), "")[[1]])
+        sel_ref <- ref_splited[(s-10):(s+l+9)]
+        mod <- selfil_1[1,]$Modification
+        if (mod == "ins"){
+          sel_ref <- c(sel_ref[1:10], c(str_split(toupper(selfil_1[1,]$ins_nt), "")[[1]]), sel_ref[11:length(sel_ref)])
+        }
+        p <- selfil_1[1,]$patterns
+        each_logo <- get_logo_top_vars(sel_ref, l, p, mod, s, cut_site)
+        
+        all_each_logo[[list_num]] <- each_logo
+        list_num <- list_num + 1
+      }
+    }
+  }
+
+  if (length(all_each_logo) > 0){
+    plot_grid(plotlist =  all_each_logo, ncol = 1)
+    ggsave(paste0(sample_name, "_top-alleles_LOGO.png"))
+  } else {
+    system(paste0("touch ",	sample_name, "_top-alleles_LOGO.png"))
+  }
+
+} else {
+  fig<-empty_plot("No alignments were produced.
+  Please check your files and references")
+  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Deletions.html"))
+  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Insertions.html"))
+  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_accumulative.html"))
+  # system(paste0("touch ",	sample_name, "_delAlleles_plot.png"))
+
+  #Elements for dynamic table IF THERE ARE NO ALIGNMENTS
+  empty_list = list()
+  exportJson <- jsonlite::toJSON(empty_list)
+  write(exportJson, paste(sample_name,"_length.json",sep=""))
+  write(exportJson, paste(sample_name,"_Total_reads.json",sep=""))
+  write(exportJson, paste(sample_name,"_metadels.json",sep=""))
+  write(exportJson, paste(sample_name,"_metains.json",sep=""))
+
+  system(paste0("touch ",	sample_name, "_top.html"))
+  system(paste0("touch ",	sample_name, "_top-alleles_LOGO.png"))
+  system(paste0("touch ",	sample_name, "_counts_plot.png"))
+  system(paste0("touch ",	sample_name, "_subs-perc_plot_LOGO.png"))
+  system(paste0("touch ",	sample_name, "_subs-perc_plot.png"))
+}
diff --git a/conf/modules.config b/conf/modules.config
index f78266e9..8c6b5d6c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -231,6 +231,10 @@ process {
         ext.args = '--cut_site=-3'
     }
 
+    withName: CRISPRSEQ_PLOTTER {
+        ext.args = '--cut_site=-3'
+    }
+
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {
         publishDir = [
             path: { "${params.outdir}/pipeline_info" },
diff --git a/modules/local/cigar_parser.nf b/modules/local/cigar_parser.nf
index 5073ab26..5b87bba4 100644
--- a/modules/local/cigar_parser.nf
+++ b/modules/local/cigar_parser.nf
@@ -11,7 +11,7 @@ process CIGAR_PARSER {
     tuple val(meta), path(reads), path(index), path(reference), val(protospacer), path(template), path(template_bam), path(reference_template), path(summary)
 
     output:
-    tuple val(meta), path("*indels.csv"), path("*_subs-perc.csv"), emit: indels
+    tuple val(meta), path("*[!QC-]indels.csv"), path("*_subs-perc.csv"), emit: indels
     tuple val(meta), path("*.html"), path("*edits.csv")          , emit: edition
     tuple val(meta), path("*cutSite.json")                       , emit: cutsite
     tuple val(meta), path("*_QC-indels.csv")                     , emit: qcindels
diff --git a/modules/local/crisprseq_plotter.nf b/modules/local/crisprseq_plotter.nf
new file mode 100644
index 00000000..30a5db68
--- /dev/null
+++ b/modules/local/crisprseq_plotter.nf
@@ -0,0 +1,42 @@
+process CRISPRSEQ_PLOTTER {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda 'r-ggplot2=3.4.3 bioconductor-shortread=1.58.0 r-ggpubr=0.6.0 r-ggmsa=1.0.2 r-seqmagick=0.1.6 r-tidyr=1.3.0 r-ggseqlogo=0.1 r-cowplot=1.1.1 r-seqinr=4.2_30 r-optparse=1.7.3 r-dplyr=1.1.2 r-plyr=1.8.8 r-stringr=1.5.0 r-plotly=4.10.2'
+    // TODO: change containers
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-61c59287265e27f2c4589cfc90013ef6c2c6acf1:fb3e48060a8c0e5108b1b60a2ad7e090cfb9eee5-0' :
+        'biocontainers/mulled-v2-61c59287265e27f2c4589cfc90013ef6c2c6acf1:fb3e48060a8c0e5108b1b60a2ad7e090cfb9eee5-0' }"
+
+    input:
+    tuple val(meta), path(indels), path(substitutions), path(reference), val(protospacer)
+
+    output:
+    tuple val(meta), path("*_Deletions.html"), path("*_delAlleles_plot.png")        , emit: deletions
+    tuple val(meta), path("*_Insertions.html")                                      , emit: insertions
+    tuple val(meta), path("*_accumulative.html")                                    , emit: accumulative
+    tuple val(meta), path("*_subs-perc_plot.png"), path("*_subs-perc_plot_LOGO.png"), emit: substitutions
+    tuple val(meta), path("*_top-alleles_LOGO.png"), path("*_top.html")             , emit: topalleles
+    path "versions.yml"                                                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    plotter.R \\
+        $args \\
+        --indels_info=$indels \\
+        --reference=$reference \\
+        --gRNA_sequence=$protospacer \\
+        --sample_name=$prefix \\
+        --substitutions_info=$substitutions
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Rscript: \$(Rscript --version)
+    END_VERSIONS
+    """
+}
diff --git a/workflows/crisprseq_targeted.nf b/workflows/crisprseq_targeted.nf
index 15d870a0..33c2ee59 100644
--- a/workflows/crisprseq_targeted.nf
+++ b/workflows/crisprseq_targeted.nf
@@ -46,6 +46,7 @@ include { PREPROCESSING_SUMMARY } from '../modules/local/preprocessing_summary'
 include { CLUSTERING_SUMMARY    } from '../modules/local/clustering_summary'
 include { ALIGNMENT_SUMMARY     } from '../modules/local/alignment_summary'
 include { TEMPLATE_REFERENCE    } from '../modules/local/template_reference'
+include { CRISPRSEQ_PLOTTER     } from '../modules/local/crisprseq_plotter'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -706,12 +707,7 @@ workflow CRISPRSEQ_TARGETED {
     ch_mapped_bam
         .join(SAMTOOLS_INDEX.out.bai)
         .join(ORIENT_REFERENCE.out.reference)
-        .join(ch_input.protospacer
-            .map {
-                meta, fastq ->
-                    [ meta , fastq ]
-            }
-        )
+        .join(ch_input.protospacer)
         .join(ch_seq_template, remainder: true)
         .join(ch_template_bam, remainder: true)
         .join(TEMPLATE_REFERENCE.out.fasta, remainder: true)
@@ -740,6 +736,17 @@ workflow CRISPRSEQ_TARGETED {
     ch_versions = ch_versions.mix(CIGAR_PARSER.out.versions.first())
 
 
+    //
+    //
+    //
+    CRISPRSEQ_PLOTTER (
+        CIGAR_PARSER.out.indels
+        .join(ORIENT_REFERENCE.out.reference)
+        .join(ch_input.protospacer)
+    )
+    ch_versions = ch_versions.mix(CRISPRSEQ_PLOTTER.out.versions.first())
+
+
     //
     // MODULE: Dump software versions
     //

From c546e5403aae0891ac61428a0ee2e8a35fe76fc4 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Fri, 18 Aug 2023 17:29:46 +0200
Subject: [PATCH 02/10] save plot results in a folder called plots

---
 conf/modules.config | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/conf/modules.config b/conf/modules.config
index 8c6b5d6c..b282aebc 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -233,6 +233,11 @@ process {
 
     withName: CRISPRSEQ_PLOTTER {
         ext.args = '--cut_site=-3'
+        publishDir = [
+            path: { "${params.outdir}/plots" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
     }
 
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {

From 7ea4c31759af1c2deb1caa21c0e00a0e6b6bdce2 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Mon, 21 Aug 2023 14:46:17 +0200
Subject: [PATCH 03/10] print plotted percentage in substitution percentage
 plot

---
 bin/plotter.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/plotter.R b/bin/plotter.R
index 10ab023d..883b2105 100755
--- a/bin/plotter.R
+++ b/bin/plotter.R
@@ -400,7 +400,7 @@ subs_plot <- function(subsperc, gRNA_seq, cut_site){
     plot <- ggplot(data=subsperc %>% filter(pos > pre_cut_site - 25) %>% filter(pos < post_cut_site + 25), aes(x=ordered(pos), y=percentage, fill=nucleotide, alpha=ifelse(percentage<95,1,0))) +
         geom_bar(stat="identity") + theme_classic() + scale_fill_manual("Nuclotides", values = c("A" = "#109648", "C" = "#86b7ed", "G" = "#f7b32b", "T" = "#d62839", "-" = "#f2f2f2")) +
         scale_x_discrete(breaks = (pre_cut_site+1):(post_cut_site-1), labels = ref_nt) + xlab(NULL) + scale_alpha(guide = 'none') + ylab('nt (%)') +
-        geom_text(aes(label = ifelse(percentage<50 & percentage>5,percentage,"")),
+        geom_text(aes(label = ifelse(percentage<50 & percentage>5,100-percentage,"")),
                 hjust = 0, vjust = 1.5, angle = 90, nudge_x = -.5,
                 size = 2.5) +
         theme(text = element_text(size = 11), legend.position = "bottom")
@@ -627,7 +627,7 @@ if (dim(data)[2]>3 && length(checkFaulty) == 0 && length(checkEmpty) == 0){ ###
         }
         p <- selfil_1[1,]$patterns
         each_logo <- get_logo_top_vars(sel_ref, l, p, mod, s, cut_site)
-        
+
         all_each_logo[[list_num]] <- each_logo
         list_num <- list_num + 1
       }

From 4acfc21892511cda60db6a8a5c358dc10f317465 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Tue, 22 Aug 2023 08:52:30 +0200
Subject: [PATCH 04/10] add mulled container for crisprseq_plotter

---
 modules/local/crisprseq_plotter.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/crisprseq_plotter.nf b/modules/local/crisprseq_plotter.nf
index 30a5db68..2a368f6c 100644
--- a/modules/local/crisprseq_plotter.nf
+++ b/modules/local/crisprseq_plotter.nf
@@ -5,8 +5,8 @@ process CRISPRSEQ_PLOTTER {
     conda 'r-ggplot2=3.4.3 bioconductor-shortread=1.58.0 r-ggpubr=0.6.0 r-ggmsa=1.0.2 r-seqmagick=0.1.6 r-tidyr=1.3.0 r-ggseqlogo=0.1 r-cowplot=1.1.1 r-seqinr=4.2_30 r-optparse=1.7.3 r-dplyr=1.1.2 r-plyr=1.8.8 r-stringr=1.5.0 r-plotly=4.10.2'
     // TODO: change containers
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-61c59287265e27f2c4589cfc90013ef6c2c6acf1:fb3e48060a8c0e5108b1b60a2ad7e090cfb9eee5-0' :
-        'biocontainers/mulled-v2-61c59287265e27f2c4589cfc90013ef6c2c6acf1:fb3e48060a8c0e5108b1b60a2ad7e090cfb9eee5-0' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' :
+        'biocontainers/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' }"
 
     input:
     tuple val(meta), path(indels), path(substitutions), path(reference), val(protospacer)

From a6cfae3ebde3b9f1d3e771664683da76a6bf8b67 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Tue, 22 Aug 2023 09:19:37 +0200
Subject: [PATCH 05/10] fix left-padding spaces

---
 bin/plotter.R | 220 +++++++++++++++++++++++++-------------------------
 1 file changed, 110 insertions(+), 110 deletions(-)

diff --git a/bin/plotter.R b/bin/plotter.R
index 883b2105..33a008f2 100755
--- a/bin/plotter.R
+++ b/bin/plotter.R
@@ -547,119 +547,119 @@ if (dim(data)[2]>3 && length(checkFaulty) == 0 && length(checkEmpty) == 0){ ###
         system(paste0("touch ",	sample_name, "_delAlleles_plot.png"))
     }
 
-  ######### Low variability or top variants plot
-  wt <- data$wt_reads[1] ### 7299
-  templata_based <- data$t_reads[1] ### 0
-  total_char <- wt + templata_based + dim(data)[1]
-
-  delCols_indels <- data %>% group_by(Modification, Start, Length, ins_nt, patterns) %>% dplyr::summarize(freq = n())
-  unique_variants <- rbind(as.data.frame(delCols_indels), c("wt", 0, 0, NA, NA, wt), c("template-based", 0, 0, NA, NA, templata_based))
-  uniq_indels_sorted <- unique_variants[order(as.numeric(unique_variants$freq), decreasing = TRUE),]
-  write.csv(uniq_indels_sorted,file=paste0(sample_name, "_unique-variants.csv"))
-
-  # Check if there are enogh indels to have 5 top
-  if(dim(uniq_indels_sorted)[1] < 5 ){
-    num_top <- dim(delCols_indels)[1]
-  } else { num_top <- 5 }
-
-  # Get to variants
-  top_5 <- uniq_indels_sorted[1:num_top,]
-
-  top5_names <- lapply(c(1:dim(top_5)[1]),
-                       function(i){
-                         if( top_5[i,]$Modification == "del" ) {
-                           return(paste0(top_5[i,]$Start, "_", top_5[i,]$Modification, top_5[i,]$Length, "_", top_5[i,]$pattern))
-                         } else if ( top_5[i,]$Modification == "ins" ) {
-                           return(paste0(top_5[i,]$Start, "_", top_5[i,]$Modification, top_5[i,]$ins_nt))
-                         } else {
-                           return(top_5[i,]$Modification)
-                         }
-                       }
-  )
-
-  wt_pos <- which(unlist(top5_names) == "wt")
-  if (length(wt_pos) == 0){
-    cols_list = c("#bebebe", rep("#9394f7", num_top + 1))
-  } else if (wt_pos == num_top){
-    cols_list = c("#bebebe", "#9394f7", rep("#9394f7", num_top - 1), "#1cf453")
-  } else if (wt_pos == 1) {
-    cols_list = c("#bebebe", "#9394f7", "#1cf453", rep("#9394f7", num_top - 1))
-  } else {
-    cols_list = c("#bebebe", rep("#9394f7", wt_pos), "#1cf453", rep("#9394f7", num_top-wt_pos))
-  }
-
-  reads_classes <- c("Other alleles", "Top alleles", unlist(top5_names))
-  reads_counts <- c(total_char - sum(as.numeric(top_5$freq)), sum(as.numeric(top_5$freq)), as.numeric(top_5$freq))
-  reads_summary <- data.frame(classes = unlist(reads_classes), counts = unlist(reads_counts))
-  reads_summary$parents =  c("", "", rep("Top alleles", num_top))
-  fig <- plot_ly(reads_summary,
-                 labels = ~ classes,
-                 parents = ~ parents,
-                 values = ~ counts,
-                 type = 'sunburst',
-                 branchvalues = 'total',
-                 textinfo = "label+percent entry",
-                 marker = list(colors = cols_list, color = "black"),
-                 textfont = list(color = '#000000', size = 20)
-  )
-
-  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_top.html"))
-
-  ### Logo plot
-  all_each_logo <- list()
-  list_num <- 1
-  sel_top <- top_5 %>% filter(Modification %in% c("del", "ins"))
-  if (dim(sel_top)[1] > 0){
-    for (i in c(1:dim(sel_top)[1])){
-      if (sel_top[i,]$Modification == "ins"){
-        selfil_1 <- data %>% filter(Modification ==  sel_top[i,]$Modification) %>% filter(Start ==  sel_top[i,]$Start) %>% filter(Length ==  sel_top[i,]$Length) %>%  filter(ins_nt ==  sel_top[i,]$ins_nt)
-      } else {
-        selfil_1 <- data %>% filter(Modification ==  sel_top[i,]$Modification) %>% filter(Start ==  sel_top[i,]$Start) %>% filter(Length ==  sel_top[i,]$Length)
-      }
-      s <- selfil_1[1,]$Start
-      l <- selfil_1[1,]$Length
-      if( s > 12  && (nchar(as.character(sread(ref_seq)[[1]])) > (s+l+9) )){
-        ref_splited <- c(str_split(toupper(as.character(sread(ref_seq)[[1]])), "")[[1]])
-        sel_ref <- ref_splited[(s-10):(s+l+9)]
-        mod <- selfil_1[1,]$Modification
-        if (mod == "ins"){
-          sel_ref <- c(sel_ref[1:10], c(str_split(toupper(selfil_1[1,]$ins_nt), "")[[1]]), sel_ref[11:length(sel_ref)])
-        }
-        p <- selfil_1[1,]$patterns
-        each_logo <- get_logo_top_vars(sel_ref, l, p, mod, s, cut_site)
+    ######### Low variability or top variants plot
+    wt <- data$wt_reads[1] ### 7299
+    templata_based <- data$t_reads[1] ### 0
+    total_char <- wt + templata_based + dim(data)[1]
+
+    delCols_indels <- data %>% group_by(Modification, Start, Length, ins_nt, patterns) %>% dplyr::summarize(freq = n())
+    unique_variants <- rbind(as.data.frame(delCols_indels), c("wt", 0, 0, NA, NA, wt), c("template-based", 0, 0, NA, NA, templata_based))
+    uniq_indels_sorted <- unique_variants[order(as.numeric(unique_variants$freq), decreasing = TRUE),]
+    write.csv(uniq_indels_sorted,file=paste0(sample_name, "_unique-variants.csv"))
+
+    # Check if there are enogh indels to have 5 top
+    if(dim(uniq_indels_sorted)[1] < 5 ){
+        num_top <- dim(delCols_indels)[1]
+    } else { num_top <- 5 }
+
+    # Get to variants
+    top_5 <- uniq_indels_sorted[1:num_top,]
+
+    top5_names <- lapply(c(1:dim(top_5)[1]),
+                        function(i){
+                            if( top_5[i,]$Modification == "del" ) {
+                                return(paste0(top_5[i,]$Start, "_", top_5[i,]$Modification, top_5[i,]$Length, "_", top_5[i,]$pattern))
+                            } else if ( top_5[i,]$Modification == "ins" ) {
+                                return(paste0(top_5[i,]$Start, "_", top_5[i,]$Modification, top_5[i,]$ins_nt))
+                            } else {
+                                return(top_5[i,]$Modification)
+                            }
+                        }
+    )
+
+    wt_pos <- which(unlist(top5_names) == "wt")
+    if (length(wt_pos) == 0){
+        cols_list = c("#bebebe", rep("#9394f7", num_top + 1))
+    } else if (wt_pos == num_top){
+        cols_list = c("#bebebe", "#9394f7", rep("#9394f7", num_top - 1), "#1cf453")
+    } else if (wt_pos == 1) {
+        cols_list = c("#bebebe", "#9394f7", "#1cf453", rep("#9394f7", num_top - 1))
+    } else {
+        cols_list = c("#bebebe", rep("#9394f7", wt_pos), "#1cf453", rep("#9394f7", num_top-wt_pos))
+    }
 
-        all_each_logo[[list_num]] <- each_logo
-        list_num <- list_num + 1
-      }
+    reads_classes <- c("Other alleles", "Top alleles", unlist(top5_names))
+    reads_counts <- c(total_char - sum(as.numeric(top_5$freq)), sum(as.numeric(top_5$freq)), as.numeric(top_5$freq))
+    reads_summary <- data.frame(classes = unlist(reads_classes), counts = unlist(reads_counts))
+    reads_summary$parents =  c("", "", rep("Top alleles", num_top))
+    fig <- plot_ly(reads_summary,
+                    labels = ~ classes,
+                    parents = ~ parents,
+                    values = ~ counts,
+                    type = 'sunburst',
+                    branchvalues = 'total',
+                    textinfo = "label+percent entry",
+                    marker = list(colors = cols_list, color = "black"),
+                    textfont = list(color = '#000000', size = 20)
+    )
+
+    htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_top.html"))
+
+    ### Logo plot
+    all_each_logo <- list()
+    list_num <- 1
+    sel_top <- top_5 %>% filter(Modification %in% c("del", "ins"))
+    if (dim(sel_top)[1] > 0){
+        for (i in c(1:dim(sel_top)[1])){
+            if (sel_top[i,]$Modification == "ins"){
+                selfil_1 <- data %>% filter(Modification ==  sel_top[i,]$Modification) %>% filter(Start ==  sel_top[i,]$Start) %>% filter(Length ==  sel_top[i,]$Length) %>%  filter(ins_nt ==  sel_top[i,]$ins_nt)
+            } else {
+                selfil_1 <- data %>% filter(Modification ==  sel_top[i,]$Modification) %>% filter(Start ==  sel_top[i,]$Start) %>% filter(Length ==  sel_top[i,]$Length)
+            }
+            s <- selfil_1[1,]$Start
+            l <- selfil_1[1,]$Length
+            if( s > 12  && (nchar(as.character(sread(ref_seq)[[1]])) > (s+l+9) )){
+                ref_splited <- c(str_split(toupper(as.character(sread(ref_seq)[[1]])), "")[[1]])
+                sel_ref <- ref_splited[(s-10):(s+l+9)]
+                mod <- selfil_1[1,]$Modification
+                if (mod == "ins"){
+                    sel_ref <- c(sel_ref[1:10], c(str_split(toupper(selfil_1[1,]$ins_nt), "")[[1]]), sel_ref[11:length(sel_ref)])
+                }
+                p <- selfil_1[1,]$patterns
+                each_logo <- get_logo_top_vars(sel_ref, l, p, mod, s, cut_site)
+
+                all_each_logo[[list_num]] <- each_logo
+                list_num <- list_num + 1
+            }
+        }
     }
-  }
 
-  if (length(all_each_logo) > 0){
-    plot_grid(plotlist =  all_each_logo, ncol = 1)
-    ggsave(paste0(sample_name, "_top-alleles_LOGO.png"))
-  } else {
-    system(paste0("touch ",	sample_name, "_top-alleles_LOGO.png"))
-  }
+    if (length(all_each_logo) > 0){
+        plot_grid(plotlist =  all_each_logo, ncol = 1)
+        ggsave(paste0(sample_name, "_top-alleles_LOGO.png"))
+    } else {
+        system(paste0("touch ",	sample_name, "_top-alleles_LOGO.png"))
+    }
 
 } else {
-  fig<-empty_plot("No alignments were produced.
-  Please check your files and references")
-  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Deletions.html"))
-  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Insertions.html"))
-  htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_accumulative.html"))
-  # system(paste0("touch ",	sample_name, "_delAlleles_plot.png"))
-
-  #Elements for dynamic table IF THERE ARE NO ALIGNMENTS
-  empty_list = list()
-  exportJson <- jsonlite::toJSON(empty_list)
-  write(exportJson, paste(sample_name,"_length.json",sep=""))
-  write(exportJson, paste(sample_name,"_Total_reads.json",sep=""))
-  write(exportJson, paste(sample_name,"_metadels.json",sep=""))
-  write(exportJson, paste(sample_name,"_metains.json",sep=""))
-
-  system(paste0("touch ",	sample_name, "_top.html"))
-  system(paste0("touch ",	sample_name, "_top-alleles_LOGO.png"))
-  system(paste0("touch ",	sample_name, "_counts_plot.png"))
-  system(paste0("touch ",	sample_name, "_subs-perc_plot_LOGO.png"))
-  system(paste0("touch ",	sample_name, "_subs-perc_plot.png"))
+    fig<-empty_plot("No alignments were produced.
+    Please check your files and references")
+    htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Deletions.html"))
+    htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Insertions.html"))
+    htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_accumulative.html"))
+    # system(paste0("touch ",	sample_name, "_delAlleles_plot.png"))
+
+    #Elements for dynamic table IF THERE ARE NO ALIGNMENTS
+    empty_list = list()
+    exportJson <- jsonlite::toJSON(empty_list)
+    write(exportJson, paste(sample_name,"_length.json",sep=""))
+    write(exportJson, paste(sample_name,"_Total_reads.json",sep=""))
+    write(exportJson, paste(sample_name,"_metadels.json",sep=""))
+    write(exportJson, paste(sample_name,"_metains.json",sep=""))
+
+    system(paste0("touch ",	sample_name, "_top.html"))
+    system(paste0("touch ",	sample_name, "_top-alleles_LOGO.png"))
+    system(paste0("touch ",	sample_name, "_counts_plot.png"))
+    system(paste0("touch ",	sample_name, "_subs-perc_plot_LOGO.png"))
+    system(paste0("touch ",	sample_name, "_subs-perc_plot.png"))
 }

From 25141457a6e1c29e8a1ffe98ae1f7ac3045e8a18 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Tue, 22 Aug 2023 10:30:59 +0200
Subject: [PATCH 06/10] output empty *_delAlleles_plot.png image when required

---
 bin/plotter.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/plotter.R b/bin/plotter.R
index 33a008f2..0982f5e8 100755
--- a/bin/plotter.R
+++ b/bin/plotter.R
@@ -647,7 +647,7 @@ if (dim(data)[2]>3 && length(checkFaulty) == 0 && length(checkEmpty) == 0){ ###
     htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Deletions.html"))
     htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_Insertions.html"))
     htmlwidgets::saveWidget(as_widget(fig), paste0(sample_name,"_accumulative.html"))
-    # system(paste0("touch ",	sample_name, "_delAlleles_plot.png"))
+    system(paste0("touch ",	sample_name, "_delAlleles_plot.png"))
 
     #Elements for dynamic table IF THERE ARE NO ALIGNMENTS
     empty_list = list()

From 08334c0aaaa895494a4317c38afe6a1c016a7ec9 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Tue, 22 Aug 2023 11:23:10 +0200
Subject: [PATCH 07/10] output empty *_subs-perc_plot_LOGO.png image when
 required

---
 bin/plotter.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/plotter.R b/bin/plotter.R
index 0982f5e8..fb540e7f 100755
--- a/bin/plotter.R
+++ b/bin/plotter.R
@@ -510,6 +510,7 @@ if (dim(data)[2]>3 && length(checkFaulty) == 0 && length(checkEmpty) == 0){ ###
     cut_site <- get_cutSite(gR, ref_seq, rel_cut_site)
     if (dim(subs_plup)[1] == 0){
         system(paste0("touch ", sample_name, "_subs-perc_plot.png"))
+        system(paste0("touch ", sample_name, "_subs-perc_plot_LOGO.png"))
     } else {
         sp <- subs_plot(subs_plup, gR, cut_site)
         ggsave(paste0(sample_name, "_subs-perc_plot.png"), width = 12, height = 1.5)

From 69ddf24e085ebe4cac46704df2b2865e087de1ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BAlia=20Mir=20Pedrol?= <mirp.julia@gmail.com>
Date: Thu, 28 Sep 2023 11:33:11 +0200
Subject: [PATCH 08/10] Apply suggestions from code review

Co-authored-by: WackerO <43847497+WackerO@users.noreply.github.com>
---
 bin/plotter.R                      | 6 +++---
 modules/local/crisprseq_plotter.nf | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bin/plotter.R b/bin/plotter.R
index fb540e7f..8d0fd8ea 100755
--- a/bin/plotter.R
+++ b/bin/plotter.R
@@ -392,7 +392,7 @@ get_sequences <- function(indels, ref_seq){
 ######################
 subs_plot <- function(subsperc, gRNA_seq, cut_site){
     ### Get the gRNA region
-    pre_cut_site <- cut_site - (nchar(gRNA_seq)-3)  ## insted of 17 to allow different gRNA lengths instead of only 20
+    pre_cut_site <- cut_site - (nchar(gRNA_seq)-3)  ## instead of 17 to allow different gRNA lengths instead of only 20
     post_cut_site <- cut_site + 3 + 1
     ### gRNA nucleotides to use them in the axis
     ref_nt <- stringr::str_split(gRNA_seq, "")[[1]]
@@ -421,7 +421,7 @@ subs_logo_plot <- function(subsperc, gRNA_seq, cut_site){
     nuc_pos[is.na(nuc_pos)] <- 0
     nuc_pos[nuc_pos == 100] <- 0
     nuc_pos_matrix <- data.matrix(nuc_pos[,-1])
-    nuc_pos_matrix <- nuc_pos_matrix+0.00001 ### This is done to aboid problemes in case of a matrix full of 0s
+    nuc_pos_matrix <- nuc_pos_matrix+0.00001 ### This is done to avoid problems in case of a matrix full of 0s
     rownames(nuc_pos_matrix) <- nuc_pos$nucleotide
     #
     ### gRNA nucleotides to use them in the axis
@@ -438,7 +438,7 @@ subs_logo_plot <- function(subsperc, gRNA_seq, cut_site){
 
 ### Logo plot of top indels
 get_logo_top_vars <- function(selected_reference, change_len, pattern_indel, modification, str_pos, cut_site){
-    # This function is used to generate the logo of the alleles that apperar most times in data.
+    # This function is used to generate the logo of the alleles that appear most times in data.
     all_pre_nt <- list(A = "", C = "", T = "", G = "")
     for (j in c("A", "C", "G", "T")){
         per_nt <- lapply(1:length(selected_reference), {
diff --git a/modules/local/crisprseq_plotter.nf b/modules/local/crisprseq_plotter.nf
index 2a368f6c..5612260e 100644
--- a/modules/local/crisprseq_plotter.nf
+++ b/modules/local/crisprseq_plotter.nf
@@ -3,7 +3,6 @@ process CRISPRSEQ_PLOTTER {
     label 'process_medium'
 
     conda 'r-ggplot2=3.4.3 bioconductor-shortread=1.58.0 r-ggpubr=0.6.0 r-ggmsa=1.0.2 r-seqmagick=0.1.6 r-tidyr=1.3.0 r-ggseqlogo=0.1 r-cowplot=1.1.1 r-seqinr=4.2_30 r-optparse=1.7.3 r-dplyr=1.1.2 r-plyr=1.8.8 r-stringr=1.5.0 r-plotly=4.10.2'
-    // TODO: change containers
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' :
         'biocontainers/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' }"

From 79e8269a79fb98585e35b4f59f361d47390cfd4c Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Thu, 28 Sep 2023 12:51:42 +0200
Subject: [PATCH 09/10] fix multiqc_config.yml

---
 assets/multiqc_config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 331a476f..4c011c9b 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,5 +1,5 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/crisprseq" target="_blank">nf-core/crisprseq</a>
+  This report has been generated by the <a href="https://github.com/nf-core/crisprseq/releases/Tag/2.0.0" target="_blank">nf-core/crisprseq</a>
   analysis pipeline. For information about how to interpret these results, please see the
   <a href="https://nf-co.re/crisprseq/2.0.0/docs/output" target="_blank">documentation</a>.
 report_section_order:

From 10c7c7471d5cd62caca03bcf598a400eb0ae73a7 Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Thu, 28 Sep 2023 13:13:06 +0200
Subject: [PATCH 10/10] fix multiqc_config.yml linting test

---
 assets/multiqc_config.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 4c011c9b..368215a8 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/crisprseq/releases/Tag/2.0.0" target="_blank">nf-core/crisprseq</a>
+  This report has been generated by the <a href="https://github.com/nf-core/crisprseq/releases/tag/dev" target="_blank">nf-core/crisprseq</a>
   analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/crisprseq/2.0.0/docs/output" target="_blank">documentation</a>.
+  <a href="https://nf-co.re/crisprseq/dev/docs/output" target="_blank">documentation</a>.
 report_section_order:
   "nf-core-crisprseq-methods-description":
     order: -1000