diff --git a/mgatk/bin/R/convert_cellbender_output.R b/mgatk/bin/R/convert_cellbender_output.R deleted file mode 100644 index 728dc6d..0000000 --- a/mgatk/bin/R/convert_cellbender_output.R +++ /dev/null @@ -1,264 +0,0 @@ -library(dplyr) -library(Matrix) -library(tidyr) -library(optparse) -library(SummarizedExperiment) - -option_list <- list( - make_option(c("-i","--input"),type="character",help="input MGATK directory",metavar="file"), - make_option(c("-c","--cellbender_h5"),type="character",help="cellbender output file (all cells!)",metavar="file"), - make_option(c("-o","--output"),type="character",help="output file with filtered counts",metavar="file"), - make_option("--cells",type="character",help="barcodes.tsv file with accepted cell barcodes",metavar="file") -) - -opt_parser <- OptionParser(option_list = option_list) -options <- parse_args(opt_parser) - -if (is.null(options$input)) { - print_help(opt_parser) - stop("please supply input directory",call.=FALSE) -} - -if (is.null(options$output)) { - print_help(opt_parser) - stop("please supply output directory",call.=FALSE) -} - -if (is.null(options$cellbender_h5)) { - print_help(opt_parser) - stop("please supply output directory",call.=FALSE) -} - -ReadCB_h5 <- function(filename, use.names = TRUE, unique.features = TRUE) { - if (!requireNamespace('hdf5r', quietly = TRUE)) { - stop("Please install hdf5r to read HDF5 files") - } - if (!file.exists(filename)) { - stop("File not found") - } - infile <- hdf5r::H5File$new(filename = filename, mode = 'r') - genomes <- names(x = infile) - output <- list() - if (hdf5r::existsGroup(infile, 'matrix')) { - # cellranger version 3 - if (use.names) { - feature_slot <- 'features/name' - } else { - feature_slot <- 'features/id' - } - } else { - if (use.names) { - feature_slot <- 'gene_names' - } else { - feature_slot <- 'genes' - } - } - for (genome in genomes) { - counts <- infile[[paste0(genome, '/data')]] - indices <- infile[[paste0(genome, '/indices')]] - indptr <- infile[[paste0(genome, '/indptr')]] - shp <- infile[[paste0(genome, '/shape')]] - features <- infile[[paste0(genome, '/', feature_slot)]][] - barcodes <- infile[[paste0(genome, '/barcodes')]] - sparse.mat <- sparseMatrix( - i = indices[] + 1, - p = indptr[], - x = as.numeric(x = counts[]), - dims = shp[], - giveCsparse = FALSE - ) - if (unique.features) { - features <- make.unique(names = features) - } - rownames(x = sparse.mat) <- features - colnames(x = sparse.mat) <- barcodes[] - sparse.mat <- as(object = sparse.mat, Class = 'dgCMatrix') - # Split v3 multimodal - if (infile$exists(name = paste0(genome, '/features'))) { - types <- infile[[paste0(genome, '/features/feature_type')]][] - types.unique <- unique(x = types) - if (length(x = types.unique) > 1) { - message("Genome ", genome, " has multiple modalities, returning a list of matrices for this genome") - sparse.mat <- sapply( - X = types.unique, - FUN = function(x) { - return(sparse.mat[which(x = types == x), ]) - }, - simplify = FALSE, - USE.NAMES = TRUE - ) - } - } - output[[genome]] <- sparse.mat - } - infile$close_all() - if (length(x = output) == 1) { - return(output[[genome]]) - } else{ - return(output) - } -} - -SparseMatrixFromBaseCounts <- function(basecounts, cells, dna.base, maxpos) { - - # Vector addition guarantee correct dimension - fwd.mat <- sparseMatrix( - i = c(basecounts$pos,maxpos), - j = c(cells[basecounts$cellbarcode],1), - x = c(basecounts$plus,0) - ) - colnames(x = fwd.mat) <- names(x = cells) - rownames(x = fwd.mat) <- paste( - dna.base, - seq_len(length.out = nrow(fwd.mat)), - "fwd", - sep = "-" - ) - rev.mat <- sparseMatrix( - i = c(basecounts$pos,maxpos), - j = c(cells[basecounts$cellbarcode],1), - x = c(basecounts$minus,0) - ) - colnames(x = rev.mat) <- names(x = cells) - rownames(x = rev.mat) <- paste( - dna.base, - seq_len(length.out = nrow(rev.mat)), - "rev", - sep = "-" - ) - return(list(fwd.mat, rev.mat)) -} - -ReadMGATK <- function(dir, verbose = TRUE) { - - if (!dir.exists(paths = dir)) { - stop("Directory not found") - } - a.path <- list.files(path = dir, pattern = "*.A.txt.gz", full.names = TRUE) - c.path <- list.files(path = dir, pattern = "*.C.txt.gz", full.names = TRUE) - t.path <- list.files(path = dir, pattern = "*.T.txt.gz", full.names = TRUE) - g.path <- list.files(path = dir, pattern = "*.G.txt.gz", full.names = TRUE) - - refallele.path <- list.files( - path = dir, - pattern = "*_refAllele.txt*", # valid mtDNA contigs include chrM and MT - full.names = TRUE - ) - - # The depth file lists all barcodes that were genotyped - depthfile.path <- list.files( - path = dir, - pattern = "*.depthTable.txt", - full.names = TRUE - ) - - if (verbose) { - message("Reading allele counts") - } - column.names <- c("pos", "cellbarcode", "plus", "minus") - a.counts <- read.table( - file = a.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - c.counts <- read.table( - file = c.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - t.counts <- read.table( - file = t.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - g.counts <- read.table( - file = g.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - - if (verbose) { - message("Reading metadata") - } - refallele <- read.table( - file = refallele.path, - header = FALSE, - stringsAsFactors = FALSE, - col.names = c("pos", "ref") - ) - refallele$ref <- toupper(x = refallele$ref) - depth <- read.table( - file = depthfile.path, - header = FALSE, - stringsAsFactors = FALSE, - col.names = c("cellbarcode", "mito.depth"), - row.names = 1 - ) - cellbarcodes <- unique(x = rownames(depth)) - cb.lookup <- seq_along(along.with = cellbarcodes) - names(cb.lookup) <- cellbarcodes - - if (verbose) { - message("Building matrices") - } - - maxpos <- dim(refallele)[1] - a.mat <- SparseMatrixFromBaseCounts( - basecounts = a.counts, cells = cb.lookup, dna.base = "A", maxpos = maxpos - ) - c.mat <- SparseMatrixFromBaseCounts( - basecounts = c.counts, cells = cb.lookup, dna.base = "C", maxpos = maxpos - ) - t.mat <- SparseMatrixFromBaseCounts( - basecounts = t.counts, cells = cb.lookup, dna.base = "T", maxpos = maxpos - ) - g.mat <- SparseMatrixFromBaseCounts( - basecounts = g.counts, cells = cb.lookup, dna.base = "G", maxpos = maxpos - ) - - counts <- rbind(a.mat[[1]], c.mat[[1]], t.mat[[1]], g.mat[[1]], - a.mat[[2]], c.mat[[2]], t.mat[[2]], g.mat[[2]]) - - return(list("counts" = counts, "depth" = depth, "refallele" = refallele)) -} - -raw <- ReadMGATK(file.path(options$input)) -rc <- raw$counts -CB <- ReadCB_h5(options$cellbender_h5) -pos <- intersect(row.names(rc),row.names(CB)) -cells <- intersect(colnames(rc),colnames(CB)) - -# if --cells is specified, restrict to those cells -if (!is.null(options$cells)) { - barcodes <- read.table(options$cells,header=FALSE,row.names=NULL) - cells <- intersect(cells,barcodes[,1]) -} - -rc <- rc[,cells] -# replace raw values by cellbender estimates -rc[pos,cells] <- CB[pos,cells] - -# at this point we probably should ensure that zero counts on one strand with non-zero counts on the other are actually zero and not NA; otherwise this triggers an error in Signac: https://github.com/timoast/signac/blob/2e4e18354297adc4b069b48a7794aa1efed35016/R/mito.R#L603 -assays <- list() -for (let in c('A','C','G','T')) { - assays[[paste0(let,'_counts_fw')]] <- rc[grepl(paste0(let,'-[0-9]*-fwd'),row.names(rc)),] - row.names(assays[[paste0(let,'_counts_fw')]]) <- raw$refallele$pos - assays[[paste0(let,'_counts_rev')]] <- rc[grepl(paste0(let,'-[0-9]*-rev'),row.names(rc)),] - row.names(assays[[paste0(let,'_counts_rev')]]) <- raw$refallele$pos -} -assays[['coverage']] <- Reduce('+', assays) -tmp <- SummarizedExperiment(assays=assays, - colData=data.frame(sample=colnames(rc), - depth=Matrix::colSums(rc)/(dim(rc)[1]/8)), - rowData=data.frame(refAllele=raw$refallele$ref, - row.names=raw$refallele$pos)) - -saveRDS(tmp, options$output) diff --git a/mgatk/bin/R/prepare_cellbender.R b/mgatk/bin/R/prepare_cellbender.R deleted file mode 100644 index 820d45c..0000000 --- a/mgatk/bin/R/prepare_cellbender.R +++ /dev/null @@ -1,368 +0,0 @@ -library(dplyr) -library(Matrix) -library(tidyr) -library(optparse) -library(data.table) - -option_list <- list( - make_option(c("-i","--input"),type="character",help="input MGATK directory",metavar="file"), - make_option(c("-o","--output"),type="character",help="output directory to be used as input for cellbender",metavar="file"), - make_option("--n_cells_conf_detected",type="integer",default=2,help="n_cells_conf_detected to select variable positions [%default]",metavar="integer"), - make_option("--strand_correlation",type="double",default=.5,help="strand_correlation to select variable positions [%default]",metavar="integer"), - make_option("--vmr",type="double",default=.001,help="vmr to select variable positions [%default]",metavar="integer") -) - -opt_parser <- OptionParser(option_list = option_list) -options <- parse_args(opt_parser) - -if (is.null(options$input)) { - print_help(opt_parser) - stop("please supply input directory",call.=FALSE) -} - -if (is.null(options$output)) { - print_help(opt_parser) - stop("please supply output directory",call.=FALSE) -} - -# copy-and-paste a few of Signac's functions for convenience - -SparseRowVar <- function(x) { - return(rowSums(x = (x - rowMeans(x = x)) ^ 2) / (dim(x = x)[2] - 1)) -} - -SparseMatrixFromBaseCounts <- function(basecounts, cells, dna.base, maxpos) { - - # Vector addition guarantee correct dimension - fwd.mat <- sparseMatrix( - i = c(basecounts$pos,maxpos), - j = c(cells[basecounts$cellbarcode],1), - x = c(basecounts$plus,0) - ) - colnames(x = fwd.mat) <- names(x = cells) - rownames(x = fwd.mat) <- paste( - dna.base, - seq_len(length.out = nrow(fwd.mat)), - "fwd", - sep = "-" - ) - rev.mat <- sparseMatrix( - i = c(basecounts$pos,maxpos), - j = c(cells[basecounts$cellbarcode],1), - x = c(basecounts$minus,0) - ) - colnames(x = rev.mat) <- names(x = cells) - rownames(x = rev.mat) <- paste( - dna.base, - seq_len(length.out = nrow(rev.mat)), - "rev", - sep = "-" - ) - return(list(fwd.mat, rev.mat)) -} - -ReadMGATK <- function(dir, verbose = TRUE) { - if (!dir.exists(paths = dir)) { - stop("Directory not found") - } - a.path <- list.files(path = dir, pattern = "*.A.txt.gz", full.names = TRUE) - c.path <- list.files(path = dir, pattern = "*.C.txt.gz", full.names = TRUE) - t.path <- list.files(path = dir, pattern = "*.T.txt.gz", full.names = TRUE) - g.path <- list.files(path = dir, pattern = "*.G.txt.gz", full.names = TRUE) - - refallele.path <- list.files( - path = dir, - pattern = "*_refAllele.txt*", # valid mtDNA contigs include chrM and MT - full.names = TRUE - ) - - # The depth file lists all barcodes that were genotyped - depthfile.path <- list.files( - path = dir, - pattern = "*.depthTable.txt", - full.names = TRUE - ) - - if (verbose) { - message("Reading allele counts") - } - column.names <- c("pos", "cellbarcode", "plus", "minus") - a.counts <- read.table( - file = a.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - c.counts <- read.table( - file = c.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - t.counts <- read.table( - file = t.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - g.counts <- read.table( - file = g.path, - sep = ",", - header = FALSE, - stringsAsFactors = FALSE, - col.names = column.names - ) - - if (verbose) { - message("Reading metadata") - } - refallele <- read.table( - file = refallele.path, - header = FALSE, - stringsAsFactors = FALSE, - col.names = c("pos", "ref") - ) - refallele$ref <- toupper(x = refallele$ref) - depth <- read.table( - file = depthfile.path, - header = FALSE, - stringsAsFactors = FALSE, - col.names = c("cellbarcode", "mito.depth"), - row.names = 1 - ) - cellbarcodes <- unique(x = rownames(depth)) - cb.lookup <- seq_along(along.with = cellbarcodes) - names(cb.lookup) <- cellbarcodes - - if (verbose) { - message("Building matrices") - } - - maxpos <- dim(refallele)[1] - a.mat <- SparseMatrixFromBaseCounts( - basecounts = a.counts, cells = cb.lookup, dna.base = "A", maxpos = maxpos - ) - c.mat <- SparseMatrixFromBaseCounts( - basecounts = c.counts, cells = cb.lookup, dna.base = "C", maxpos = maxpos - ) - t.mat <- SparseMatrixFromBaseCounts( - basecounts = t.counts, cells = cb.lookup, dna.base = "T", maxpos = maxpos - ) - g.mat <- SparseMatrixFromBaseCounts( - basecounts = g.counts, cells = cb.lookup, dna.base = "G", maxpos = maxpos - ) - - counts <- rbind(a.mat[[1]], c.mat[[1]], t.mat[[1]], g.mat[[1]], - a.mat[[2]], c.mat[[2]], t.mat[[2]], g.mat[[2]]) - - return(list("counts" = counts, "depth" = depth, "refallele" = refallele)) -} - -ProcessLetter <- function( - object, - letter, - ref_alleles, - coverage, - stabilize_variance = TRUE, - low_coverage_threshold = 10, - verbose = TRUE -) { - if (verbose) { - message("Processing ", letter) - } - boo <- ref_alleles$ref != letter & ref_alleles$ref != "N" - cov <- coverage[boo, ] - - variant_name <- paste0( - as.character(ref_alleles$pos), - ref_alleles$ref, - ">", - letter - )[boo] - - nucleotide <- paste0( - ref_alleles$ref, - ">", - letter - )[boo] - - position_filt <- ref_alleles$pos[boo] - - # get forward and reverse counts - fwd.counts <- GetMutationMatrix( - object = object, - letter = letter, - strand = "fwd" - )[boo, ] - - rev.counts <- GetMutationMatrix( - object = object, - letter = letter, - strand = "rev" - )[boo, ] - - # convert to row, column, value - fwd.ijx <- summary(fwd.counts) - rev.ijx <- summary(rev.counts) - - # get bulk coverage stats - bulk <- (rowSums(fwd.counts + rev.counts) / rowSums(cov)) - # replace NA or NaN with 0 - bulk[is.na(bulk)] <- 0 - bulk[is.nan(bulk)] <- 0 - - # find correlation between strands for cells with >0 counts on either strand - # group by variant (row) and find correlation between strand depth - both.strand <- data.table(cbind(fwd.ijx, rev.ijx$x)) - both.strand$i <- variant_name[both.strand$i] - colnames(both.strand) <- c("variant", "cell_idx", "forward", "reverse") - - cor_dt <- suppressWarnings(expr = both.strand[, .(cor = cor( - x = forward, y = reverse, method = "pearson", use = "pairwise.complete") - ), by = list(variant)]) - - # Put in vector for convenience - cor_vec_val <- cor_dt$cor - names(cor_vec_val) <- as.character(cor_dt$variant) - - # Compute the single-cell data - mat <- (fwd.counts + rev.counts) / cov - rownames(mat) <- variant_name - # set NAs and NaNs to zero - mat@x[!is.finite(mat@x)] <- 0 - - # Stablize variances by replacing low coverage cells with mean - if (stabilize_variance) { - idx_mat <- which(cov < low_coverage_threshold, arr.ind = TRUE) - idx_mat_mean <- bulk[idx_mat[, 1]] - ones <- 1 - sparseMatrix( - i = c(idx_mat[, 1], dim(x = mat)[1]), - j = c(idx_mat[, 2], dim(x = mat)[2]), - x = 1 - ) - means_mat <- sparseMatrix( - i = c(idx_mat[, 1], dim(x = mat)[1]), - j = c(idx_mat[, 2], dim(x = mat)[2]), - x = c(idx_mat_mean, 0) - ) - mmat2 <- mat * ones + means_mat - variance <- SparseRowVar(x = mmat2) - } else { - variance <- SparseRowVar(x = mat) - } - detected <- (fwd.counts >= 2) + (rev.counts >= 2) - - # Compute per-mutation summary statistics - var_summary_df <- data.frame( - position = position_filt, - nucleotide = nucleotide, - variant = variant_name, - vmr = variance / (bulk + 0.00000000001), - mean = round(x = bulk, digits = 7), - variance = round(x = variance, digits = 7), - n_cells_conf_detected = rowSums(x = detected == 2), - n_cells_over_5 = rowSums(x = mat >= 0.05), - n_cells_over_10 = rowSums(x = mat >= 0.10), - n_cells_over_20 = rowSums(x = mat >= 0.20), - strand_correlation = cor_vec_val[variant_name], - mean_coverage = rowMeans(x = cov), - stringsAsFactors = FALSE, - row.names = variant_name - ) - return(var_summary_df) -} - -GetMutationMatrix <- function(object, letter, strand) { - keep.rows <- paste( - letter, - seq_len(length.out = nrow(x = object) / 8), - strand, - sep = "-" - ) - return(object[keep.rows, ]) -} - -ComputeTotalCoverage <- function(object, verbose = TRUE) { - if (verbose) { - message("Computing total coverage per base") - } - rowstep <- nrow(x = object) / 8 - mat.list <- list() - for (i in seq_len(length.out = 8)) { - mat.list[[i]] <- object[(rowstep * (i - 1) + 1):(rowstep * i), ] - } - coverage <- Reduce(f = `+`, x = mat.list) - coverage <- as.matrix(x = coverage) - rownames(x = coverage) <- seq_along(along.with = rownames(x = coverage)) - return(coverage) -} - -IdentifyVariants <- function( - object, - refallele, - stabilize_variance = TRUE, - low_coverage_threshold = 10, - verbose = TRUE, - ... -) { - coverages <- ComputeTotalCoverage(object = object, verbose = verbose) - a.df <- ProcessLetter( - object = object, - letter = "A", - coverage = coverages, - ref_alleles = refallele, - stabilize_variance = stabilize_variance, - low_coverage_threshold = low_coverage_threshold, - verbose = verbose - ) - t.df <- ProcessLetter( - object = object, - letter = "T", - coverage = coverages, - ref_alleles = refallele, - stabilize_variance = stabilize_variance, - low_coverage_threshold = low_coverage_threshold, - verbose = verbose - ) - c.df <- ProcessLetter( - object = object, - letter = "C", - coverage = coverages, - ref_alleles = refallele, - stabilize_variance = stabilize_variance, - low_coverage_threshold = low_coverage_threshold, - verbose = verbose - ) - g.df <- ProcessLetter( - object = object, - letter = "G", - coverage = coverages, - ref_alleles = refallele, - stabilize_variance = stabilize_variance, - low_coverage_threshold = low_coverage_threshold, - verbose = verbose - ) - return(rbind(a.df, t.df, c.df, g.df)) -} - -tmp <- ReadMGATK(file.path(options$input)) -variable.sites <- IdentifyVariants(tmp$counts, refallele = tmp$refallele) -high.conf <- subset(variable.sites, subset = (n_cells_conf_detected >= options$n_cells_conf_detected & strand_correlation >= options$strand_correlation & vmr > options$vmr)) - -var.pos <- high.conf %>% - dplyr::mutate(pos1=gsub('([0-9]*)([ACGT])>([ACGT])','\\2-\\1-fwd',variant), - pos2=gsub('([0-9]*)([ACGT])>([ACGT])','\\3-\\1-fwd',variant), - pos3=gsub('([0-9]*)([ACGT])>([ACGT])','\\2-\\1-rev',variant), - pos4=gsub('([0-9]*)([ACGT])>([ACGT])','\\3-\\1-rev',variant)) %>% - dplyr::select(pos1,pos2,pos3,pos4) %>% - gather(p,pos) %>% - dplyr::pull(pos) - -write.table(var.pos,file.path(options$output,'genes.tsv'), - col.names=FALSE,row.names=FALSE,quote=FALSE) -writeMM(tmp$counts[var.pos,],file.path(options$output,'matrix.mtx')) -write.table(colnames(tmp$counts),file.path(options$output,'barcodes.tsv'), - row.names=FALSE,col.names=FALSE,quote=FALSE) diff --git a/mgatk/cli.py b/mgatk/cli.py index ee2ba9e..783cb27 100644 --- a/mgatk/cli.py +++ b/mgatk/cli.py @@ -64,8 +64,8 @@ @click.option('--skip-R', '-sr', is_flag=True, help='Generate plain-text only output. Otherwise, this generates a .rds obejct that can be immediately read into R for downstream analysis.') @click.option('--snake-stdout', '-so', is_flag=True, help='Write snakemake log to sdout rather than a file. May be necessary for certain HPC environments.') -@click.option('--ncells_fg', '-nfg', default = 1000, help='number of "foreground" cells to use for CellBender. Default = 1000.') -@click.option('--ncells_bg', '-nbg', default = 20000, help='number of "background" cells to use for CellBender. Default = 20000.') +@click.option('--ncells_fg', '-nfg', default = 1000, help='DEPRECATED: number of "foreground" cells to use for CellBender. Default = 1000.', hidden=True) +@click.option('--ncells_bg', '-nbg', default = 20000, help='DEPRECATED: number of "background" cells to use for CellBender. Default = 20000.', hidden=True) def main(mode, input, output, name, mito_genome, ncores, cluster, jobs, barcode_tag, barcodes, min_barcode_reads, @@ -76,7 +76,7 @@ def main(mode, input, output, name, mito_genome, ncores, """ mgatk: a mitochondrial genome analysis toolkit. \n - MODE = ['bcall', 'call', 'tenx', 'check', 'support', 'remove-background'] \n + MODE = ['bcall', 'call', 'tenx', 'check', 'support'] \n See https://github.com/caleblareau/mgatk/wiki for more details. """ @@ -104,9 +104,6 @@ def main(mode, input, output, name, mito_genome, ncores, if (mode == "call" or mode == "tenx" or mode == "bcall") and not skip_r: check_software_exists("R") check_R_packages(["data.table", "SummarizedExperiment", "GenomicRanges", "Matrix"]) - elif (mode=='remove-background'): - check_software_exists("R") - check_R_packages(["data.table", "SummarizedExperiment", "GenomicRanges", "Matrix", "optparse", "dplyr", "tidyr", "hdf5r"]) # Determine which genomes are available rawsg = glob.glob(script_dir + "/bin/anno/fasta/*.fasta") @@ -429,49 +426,7 @@ def main(mode, input, output, name, mito_genome, ncores, if(mode == "remove-background"): - of = output - check_software_exists("cellbender") - check_software_exists("Rscript") - - logf = open(output + "/logs" + "/bg.mgatk.log", 'a') - # prepare mgatk output for CellBender - # check that software exists - - cellbender_input_dir = output + "/cellbender_input" - make_folder(cellbender_input_dir) - # call prepare_cellbender.R to convert mgatk output to cellbender input for variable positions (specify cutoffs on --n_cells_conf_detected, --strand_correlation and --vmr as well?)" - Rcall = "Rscript " + script_dir + "/bin/R/prepare_cellbender.R -i " + output + "/final -o " + cellbender_input_dir - os.system(Rcall) - #print(Rcall) - click.echo(gettime() + "Prepared input for CellBender", logf) - - # run CellBender - # check that software exists - - cellbender_output_dir = output + "/cellbender_output" - make_folder(cellbender_output_dir) - - # pass other arguments to cellbender? - cellbender_cmd = "cellbender remove-background --input " + output + "/cellbender_input --output " + output + "/cellbender_output/" + name + ".h5 --expected-cells " + str(ncells_fg) + " --total-droplets-included " + str(ncells_bg) + " --fpr 0.01 --epochs 100 --low-count-threshold 1" - os.system(cellbender_cmd) - #print(cellbender_cmd) - click.echo(gettime() + "Finished CellBender run", logf) - - # convert CellBender output (here we could restrict the output to good cells only with the --cells argument, expects the barcodes.tsv file) - Rcall = "Rscript " + script_dir + "/bin/R/convert_cellbender_output.R -i " + output + "/final -c " + output + "/cellbender_output/" + name + ".h5 -o " + output + "/final/" + name + "_filtered.rds" - os.system(Rcall) - #print(Rcall) - click.echo(gettime() + "Converted CellBender output", logf) - - if keep_temp_files: - click.echo(gettime() + "Temporary files not deleted since --keep-temp-files was specified.", logf) - else: - shutil.rmtree(of + "/cellbender_input") - shutil.rmtree(of + "/cellbender_output") - click.echo(gettime() + "Intermediate files successfully removed.", logf) - - # Suspend logging - logf.close() + click.echo(gettime() + "mgatk remove-background is deprecated. please use mitobender from github.com/bihealth/mitobender") #-------- # Cleanup