diff --git a/galaxy/probmetab/Makefile b/galaxy/probmetab/Makefile new file mode 100755 index 0000000..fc4a922 --- /dev/null +++ b/galaxy/probmetab/Makefile @@ -0,0 +1,38 @@ +# USAGE: make [install|clean] +# make RELEASE=yes + + + +# -------- CAN BE MODIFIED -------- + +#PACKAGES=package1.tgz package2.tgz +PACKAGES=probmetab.tgz + +# ------------------------ + +all: $(PACKAGES) +ifeq ($(RELEASE),yes) + @echo "--Release version `date "+%Y%m%d"`--" +endif + +# -------- CAN BE MODIFIED -------- + +probmetab.tgz: probmetab.r lib.r ProbMetab.xml static README.txt export.class.table-color-graph.R + $(SEDVERSION) + tar --exclude=".svn" -zchf $@ $^ + +# ------------------------ + +RELEASE=no +SEDVERSION= +ifeq ($(RELEASE),yes) + SEDVERSION=sed -i "s/version=\"[0-9]\+\"/version=\"`date "+%Y%m%d"`\"/" *.xml; sed -i "s/version=\"[0-9]\+\"/version=\"`date "+%Y%m%d"`\"/" *.r ; sed -i "s/version=\"[0-9]\+\"/version=\"`date "+%Y%m%d"`\"/" *.txt +endif + +# ------------------------ + +install: $(PACKAGES) + cp $(PACKAGES) ~ + +clean: + rm $(PACKAGES) diff --git a/galaxy/probmetab/ProbMetab.xml b/galaxy/probmetab/ProbMetab.xml new file mode 100644 index 0000000..a600468 --- /dev/null +++ b/galaxy/probmetab/ProbMetab.xml @@ -0,0 +1,393 @@ + + + + Rscript + + + Wrapper function for ProbMetab R package. + + + probmetab.r xfunction probmetab + #if $acquisition_options.mode == "one": + mode_acquisition $acquisition_options.mode xa $acquisition_options.xa + ##if $acquisition_options.xsetnofill_options.option == "show": + ##xsetnofill $acquisition_options.xsetnofill_options.xsetnofill + ##end if + #else + mode_acquisition $acquisition_options.mode inputs_mode $acquisition_options.input_mode.option + #if $acquisition_options.input_mode.option== "two": + + image_pos $acquisition_options.input_mode.image_pos image_neg $acquisition_options.input_mode.image_neg + ##if $acquisition_options.input_mode.xsetnofill_options.option == "show": + ##xsetPnofill $acquisition_options.input_mode.xsetnofill_options.xsetPnofill xsetNnofill $acquisition_options.input_mode.xsetnofill_options.xsetNnofill + ##end if + ##else + ##image_combinexsannos $acquisition_options.input_mode.image_combinexsannos image_pos $acquisition_options.input_mode.image_pos + #end if + + #end if + + #if $option_toexclude.option == "show": + toexclude $option_toexclude.toexclude + #end if + allowMiss $allowMiss html $html kegg_db $kegg_db ppm_tol $ppm_tol + opt $opt corths $corths corprob $corprob pcorprob $pcorprob prob $prob + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (html) + + + + (html) + + + + + (acquisition_options['mode'] == 'one') + + + + (acquisition_options['mode'] == 'two') + + + (acquisition_options['mode'] == 'two') + + + (acquisition_options['mode'] == 'two') + + + + + + + + + + + + + +.. class:: infomark + +**Authors** Ricardo R. Silva et al. (2013) rsilvabioinfo@usp.br + + | If you use this tool, please cite: Silva RR and al.(2010). ProbMetab: an R package for Bayesian probabilistic annotation of LC-MS-based metabolomics.. + | For details about this tool, please go to http://labpib.fmrp.usp.br/methods/probmetab/ + +.. class:: infomark + +**Galaxy integration** Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABIMS TEAM, Station biologique de Roscoff. + + | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. + +.. class:: infomark + +**Contributors** Ricardo R. Silva rsilvabioinfo@usp.br, Yann Guitton yann.guitton@univ-nantes.fr and Jean-François Martin jean-francois.martin@toulouse.inra.fr + +--------------------------------------------------- + + +========= +ProbMetab +========= + +----------- +Description +----------- + +**What it does?** + +ProbMetab, an R package that promotes substantial improvement in automatic probabilistic liquid chromatography-mass spectrometry-based metabolome annotation. The inference engine core is based on a Bayesian model implemented to (i) allow diverse source of experimental data and metadata to be systematically incorporated into the model with alternative ways to calculate the likelihood function and (ii) allow sensitive selection of biologically meaningful biochemical reaction databases as Dirichlet-categorical prior distribution. Additionally, to ensure result interpretation by system biologists, we display the annotation in a network where observed mass peaks are connected if their candidate metabolites are substrate/product of known biochemical reactions. This graph can be overlaid with other graph-based analysis, such as partial correlation networks, in a visualization scheme exported to Cytoscape, with web and stand-alone versions. + + +**Details** + +ProbMetab assumes peak detection, retention time correction and peak grouping [4, 5] in order to +perform mass peak to compound assignment. + +Once the initial annotation for different forms of the same ion (adducts and isotopes), is defined, +one can seek for a non-redundant set of putative molecules (after charge and possible adduct +correction) for further inference of compound identity. + +Experience shows that standard mass rules for adduct search may lose peaks, and specific rule tables must be setup for a given +experimental condition. In order to address this issue, a flexible workflow, which allows users to +integrate different methods, would improve true molecular ions recovery. + +The ion annotation table has the following core information: exact mass of putative molecule with experimental error; isotopic pattern associated; adduct form associated, and the original reference to raw data. + + + + +----------------- +Workflow position +----------------- + + +**Upstream tools** + +========================= ========================================== ======= ========== +Name Output file Format Parameter +========================= ========================================== ======= ========== +xcms.annotate xset.annotate_POS (or NEG).RData RData RData file +========================= ========================================== ======= ========== + + +**General schema of the metabolomic workflow** + +.. image:: probmetab_workflow.png + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : label | Format | ++===========================+============+ +|RData Input | RData | ++---------------------------+------------+ +|RData group step | RData | ++---------------------------+------------+ + + +---------- +Parameters +---------- + +**Allow Miss** + + +Optionally retrieves peaks with no evidence of adduct or isotope and annotate them as single charged molecules [M+/-H]. + + +**polarity** + + +Acquisition mode of mass spectrometer. + + +**Exclude samples** + + +Samples to be excluded of peak counting to non-annotated peak selection. + +**Calculate** + +**intervals** +A vector of SNR numerical intervals, to which different carbon offset should be added to predicted C-number. + +**offset** + +A vector of empirically estimated carbon offset to be added to predicted C-number. + +**massWeigth** +Is the contribution parameter of the probabilistic model. + +**likelihood** + +Which noise model to use, "erfc" to complementary error function, or "gaussian" to standard gaussian with two sd corresponding to the given p.p.m precision. + +**precision** + +Equipment mass accuracy, usually the same used in exact mass search. + +**KEGG database** + + +Select if you want to search on all KEGG organisms or multiple organisms (id1,id2,id3,...).By default,the value is KEGG which means searching on all KEGG organism. The list of KEGG IDs are available at "http://rest.kegg.jp/list/organism". + +**ppm.tol** + + +Parts per million mass tolerance allowed in the mass search (create.reactionMfunction). + +**HTML** + + +Logical, check if you want to generate a HTML ProbMetab report.This parameter uses the raw data to plot EICs and may be time consuming. + +**opt** + + +(reac2cor function) Correlation option, cor for correlation, and pcor for partial correlation. + +**corprob** + +(reac2cor function) Probability that the correlation is considered significant. + + +**pcorprob** + + +(reac2cor function) Probability that the partial correlation is considered significant. + +**corths** + + +(reac2cor function) Correlation intensity threshold. + +**prob** + +(export.class.table). How to calculate the probability to attribute a mass to a compound. Default is "count", which divide the number of times each identity was was attributed by the number of samples. Optionally the user could choose to use the mean of the probabilities of the identity, "mean". + + +**organismIdorganismId** + +(create.pathway.node.attributes function) KEGG organism id (http://www.kegg.jp/kegg/catalog/org_list.html) to filter possibibly pathwyas for known pathways for that organism. Only works for KEGG database for now. Default isNULL (all KEGG organisms). + + + +------------ +Output files +------------ + +Probmetab.RData + | Rdata file, that be used outside Galaxy in R. + +Probmetab.Analysis_Report_htmlSelect if you want to search on all KEGG organisms or multiple organisms (id1,id2,id3,...).By default,the value is KEGG which means searching on all KEGG organism. The list of KEGG IDs are available at http://rest.kegg.jp/list/organism + | A list with a matrix "classTable" with attributions and probabilities and indexes of selected masses from xcms peak table (HTML format). + +Probmetab.CytoScape_output_Attribute_List.tsv + | A list with a matrix "classTable" with attributions and probabilities and indexes of selected masses from xcms peak table, that can be used as Attribute table list in CytoScape (tsv format). + +Probmetab.Analysis_Report_EICs_plots + | Zip file containing the EIC plots (PNG format) of the metabolites that are listed in the HTML or tsv report. + +Probmetab.CytoScape_output.sif + | Sif format file that can be used in CytoScape to visualize the network. + + + + + + + + 10.1093/bioinformatics/btu019 + 10.1093/bioinformatics/btu813 + + + + diff --git a/galaxy/probmetab/README.txt b/galaxy/probmetab/README.txt new file mode 100644 index 0000000..bcaa095 --- /dev/null +++ b/galaxy/probmetab/README.txt @@ -0,0 +1,56 @@ +------------------ +ProbMetab Workflow +------------------ + +----------- +Description +----------- + +This is a Galaxy Wrapper version="1.0.0" for the ProbMetab tool. + +ProbMetab, an R package that promotes substantial improvement in automatic probabilistic liquid chromatography-mass spectrometry-based metabolome annotation. The inference engine core is based on a Bayesian model implemented to (i) allow diverse source of experimental data and metadata to be systematically incorporated into the model with alternative ways to calculate the likelihood function and (ii) allow sensitive selection of biologically meaningful biochemical reaction databases as Dirichlet-categorical prior distribution. Additionally, to ensure result interpretation by system biologists, we display the annotation in a network where observed mass peaks are connected if their candidate metabolites are substrate/product of known biochemical reactions. This graph can be overlaid with other graph-based analysis, such as partial correlation networks, in a visualization scheme exported to Cytoscap, , with web and stand-alone versions. + +----------- +Please cite +----------- + +Authors Ricardo R. Silva et al. (2013) + +Bioinformatics. 2014 May 1;30(9):1336-7. doi: 10.1093/bioinformatics/btu019. Epub 2014 Jan 17. "ProbMetab: an R package for Bayesian probabilistic annotation of LC-MS-based metabolomics". + +-------------- +Galaxy Wrapper +-------------- + +ABIMS TEAM Misharl Monsoor mmonsoor@sb-roscoff.fr + +Contributors Yann Guitton yann.guitton@univ-nantes.fr and Ricardo R. Silva. + + +------------ +Requirements +------------ + +You need to install the R package library ProbMetab. + +Download the package here: + +http://labpib.fmrp.usp.br/methods/probmetab/resources/ProbMetab_1.0.tar.gz + +-------------------------------------------------------------------- +Instructions for integration of the this tool into the workflow-system +Galaxy (http://getgalaxy.org) +-------------------------------------------------------------------- + +For installing the tool into your Galaxy installation, please do the following: + + - First of all, you need to have an account on the ABIMS server. Follow the link to abims website, where it is explained how to get an account on W4M: +http://abims.sb-roscoff.fr/faq + + - For a manual installation, copy the image from the static/images/ into the "galaxy-dist/static/images/" + + + +Last but not least, restart Galaxy. + + diff --git a/galaxy/probmetab/export.class.table-color-graph.R b/galaxy/probmetab/export.class.table-color-graph.R new file mode 100755 index 0000000..0c157ac --- /dev/null +++ b/galaxy/probmetab/export.class.table-color-graph.R @@ -0,0 +1,455 @@ +#' export.class.table +#' +#' Builds a matrix with the probability for all mass to candidate compounds +#' assignments, by averaging the number of assignments obtained by the gibbs sampler algorithm +#' or ordering the compound candidates with the likelihood matrix. +#' +#' @param gibbsL a list of attributions and probabilities from gibbs.samp function. +#' @param reactionM data.frame with compound annotation information. +#' @param molIon non redundant ion annotation. +#' @param probM optionally to gibbsL, a matrix of likelihoods. +#' @param html logical, indicating whether a html file should be generated. This parameter uses the raw data to plot EICs and may be time consuming. +#' @param filename html file name, the default is "test". +#' @param burnIn how many samples of the gibbs sampler should be discarded. +#' @param linkPattern which pattern should be linked to compound id, for now we have +#' implemented "kegg", "pubchem" and "chebi" patterns. +#' @param m.test statistical test to compare mean differences. This option +#' is only available to single acquisition mode analysis, with options +#' "t.test" and "anova". +#' @param class1 if the m.test is "t.test" first class to compare in the test, +#' according with xcmsSet phenoData. +#' @param class2 if the m.test is "t.test" second class to compare in the test, +#' according with xcmsSet phenoData. +#' @param norm logical, if TRUE performs median normalization from (Anal. Chem. 2011, 83, 5864-5872). +#' @param DB data.frame table used to search compounds, with the field name to be incorporated in the html table. +#' @param prob how to calculate the probability to attribute a mass to a compound. +#' Default is "count", which divide the number of times each identity was +#' was attributed by the number of samples. Optionally the user could +#' choose to use the mean of the probabilities of the identity, "mean". +#' @return A list with a matrix "classTable" with attributions and probabilities and +#' indexes of selected masses from xcms peak table. +#' +#' @export + +export.class.table <- function(gibbsL=NULL, reactionM, molIon=NULL, probM=NULL, html=FALSE, filename="test", burnIn=3000, linkPattern="kegg", m.test="none", class1=NULL, class2=NULL, norm=FALSE, DB, prob="count", colorplot=FALSE, addLink=NULL) { + + plotEIC <- function (xcmsObject, figidx, pngidx, colorplot, mode=NULL) { + dir.create(paste(filename,"_fig",sep="")) + gt<-groups(xcmsObject) + if(colorplot==TRUE){ + gt2 <- gt[figidx,] + rgt <- gt2[,c("rtmin","rtmax")] + rgt[,1] <- rgt[,1]-100 + rgt[,2] <- rgt[,2]+100 + #require(doMC) + #registerDoMC() + #system.time( + #foreach(i=1:nrow(gt2)) %dopar% { + for(i in 1:nrow(gt2)){ + groupidx1 <- which(gt[,"rtmed"] > rgt[i,1] & gt[,"rtmed"] < rgt[i,2] & gt[,"mzmed"]> gt2[i,"mzmed"]-0.1 & gt[,"mzmed"]< gt2[i,"mzmed"]+0.1) + eiccor <- getEIC(xcmsObject, groupidx = groupidx1) + png(paste(filename, "_fig/", sprintf("%003d", i), ".png", sep="")) + plot(eiccor, xcmsObject, groupidx = 1) + dev.off() + } + } else { + gt <- gt[figidx,] + rgt <- gt[,c("rtmin","rtmax")] + rgt[,1] <- rgt[,1]-100 + rgt[,2] <- rgt[,2]+100 + + eics <- getEIC(xcmsObject, mzrange=gt, rtrange =rgt, groupidx = 1:nrow(gt)) + png(file.path(paste(filename, "_fig/%003d.png", sep="")), height=768, width=1024) + #png(file.path(paste(filename, "_fig/", pngidx, sep="")), h=768, w=1024) + plot(eics, xcmsObject) + dev.off() + } + if(!is.null(mode)) { + pngs <- dir(paste(filename, "_fig/", sep="")) + if(length(grep("pos|neg" , pngs))) pngs <- pngs[-grep("pos|neg" , pngs)] + opng <- as.numeric(sub(".png","", pngs)) + pngs <- pngs[order(opng)] + name1 <- paste(filename, "_fig/",pngs, sep="") + name2 <- paste(filename, "_fig/",pngidx, mode, ".png", sep="") + for(i in 1:length(name1)) file.rename(name1[i], name2[i]) + } + + } + allion <- molIon$molIon[molIon$molIon[,"isotope"]==0,] + ReactMatrix <- reactionM[reactionM[,5]!="unknown",] + x <- apply(unique(ReactMatrix[,c(2, 3)]), 2, as.numeric) # Have to look for all pairs + y <- as.numeric(ReactMatrix[,4]) + prob_mean_ma <- matrix(0, nrow = length(y), ncol = nrow(x)) +# z_average <- matrix(0, nrow = length(y), ncol = length(x)) + + if (!is.null(gibbsL)){ + prob_table <- gibbsL$prob_table[,-c(1:burnIn)] + class_table <- gibbsL$class_table[,-c(1:burnIn)] + #indList <- tapply(1:nrow(ReactMatrix), as.numeric(ReactMatrix[,1]), function(x) x) + coords <- tapply(1:nrow(ReactMatrix), ReactMatrix[,"molIonID"], function(x) x) + coords2 <- unlist(lapply(coords, function(x) rep(x[1], length(x)))) + indList <- coords[order(unique(coords2))] + fillMatrix <- function(j,i) { + idp <- which(class_table[i,] == j) + if(prob=="count") prob_mean_ma[j,i] <<- length(idp)/ncol(class_table) + if(prob=="mean") prob_mean_ma[j,i] <<- mean(prob_table[i,idp]) + } + + + for ( i in 1:nrow(x) ) { + + sapply(indList[[i]], fillMatrix, i) + } + if(sum(prob_mean_ma=="NaN")) prob_mean_ma[prob_mean_ma=="NaN"] <- 0 +# for ( i in 1:nrow(x) ) { +# for ( j in 1:length(y) ) { +# idp <- which(class_table[i,] == j) +# prob_mean_ma[j,i] <- mean(prob_table[i,idp]) +# # this is an alternative way to calculate the probabilities, should try latter, and compare results +# #prob_mean_ma[j,i] <- length(idp)/ncol(class_table) +# if ( prob_mean_ma[j,i] == "NaN" ) prob_mean_ma[j,i] <- 0 +# } +# # do I still need this matrix? +# k=which(prob_mean_ma[,i]==max(prob_mean_ma[,i])) +# z_average[k[1],i]=1 +# } + } + else { + prob_mean_ma <- probM + } + # think about natural probabilities + # prob_mean_ma[prob_mean_ma[,1]!=0,1]/sum(prob_mean_ma[prob_mean_ma[,1]!=0,1]) + prob_mean_ma <- apply(prob_mean_ma, 2, function(x){ x[x!=0] <- x[x!=0]/sum(x[x!=0]); return(x)} ) + + # create a dir to figures + lpattern <- function(type){ + switch(type, + kegg = "http://www.genome.jp/dbget-bin/www_bget?", + chebi = "http://www.ebi.ac.uk/chebi/searchId.do;EFB7DFF9E88306BBCD6AB78B32664A85?chebiId=", + pubchem = "http://www.ncbi.nlm.nih.gov/pccompound/?term=" + ) + } + linkURL <- lpattern(linkPattern) + fig <- paste("file://", getwd(), paste("/",filename,"_fig/",sep=""), sep="") + if(!is.null(molIon$cameraobj)) { + figidx <- c("") + coords <- gsub("(^\\d)","X\\1",rownames(molIon$cameraobj@xcmsSet@phenoData)) + # experimental! Which set of characters???? + coords <- gsub("-|\\,|~","\\.",coords) + coords <- gsub("\\s+","\\.",coords) + peaklist <- getPeaklist(molIon$cameraobj) + rpeaklist <- peaklist[,c("mz","rt","isotopes","adduct","pcgroup")] + } + else { + figidx <- c("","") + coordsP <- gsub("(^\\d)","X\\1",rownames(molIon$pos@xcmsSet@phenoData)) + # experimental! Which set of characters???? + coordsP <- gsub("-|\\,|~","\\.",coordsP) + coordsP <- gsub("\\s+","\\.",coordsP) + coordsN <- gsub("(^\\d)","X\\1",rownames(molIon$neg@xcmsSet@phenoData)) + # experimental! Which set of characters???? + coordsN <- gsub("-|\\,|~","\\.",coordsN) + coordsN <- gsub("\\s+","\\.",coordsN) + coords <- coordsP + if(length(coordsP)!=length(coordsN)) cat("\n Warning: The number of samples are different\n") + + peaklistP <- getPeaklist(molIon$pos) + rpeaklistP <- peaklistP[,c("mz","rt","isotopes","adduct","pcgroup")] + peaklistN <- getPeaklist(molIon$neg) + rpeaklistN <- peaklistN[,c("mz","rt","isotopes","adduct","pcgroup")] + } + +# if(sum(is.na(peaklist))) { +# cat("\nWarning: NAs Found in peaklist\n\nSubstituting for \"ones\"\n") +# na.ids <- which(is.na(peaklist),arr.ind=TRUE) +# for(l in 1:nrow(na.ids)){ +# peaklist[na.ids[l,][1], na.ids[l,][2]] <- 1 +# } +# } +# + + ans <- matrix("", nrow=1, ncol=7+length(coords)) + unq <- unique(ReactMatrix[,2:3]) + for (i in 1:nrow(unq)) { + coord <- which(ReactMatrix[,2]==unq[i,1] & ReactMatrix[,3]==unq[i,2]) + coord2 <- which(allion[,2]==unq[i,1] & allion[,1]==unq[i,2]) + # idx2 <- unique(which(allion[,1] %in% reactionM[reactionM[,5]=="unknown",2])) + # work with the higher intensities for a given ion annotation, not necessarily the right one + + if(!is.null(molIon$cameraobj)) { + idx <- as.vector(unlist(sapply(allion[coord2,"trace"], + function(x) { + x <- as.matrix(x) + raw <- strsplit(x,";")[[1]] + mraw <- apply(peaklist[raw, coords], 1, mean) + raw[which.max(mraw)] + } + + ) + ) + ) + + idx <- unique(idx) + figidx <- append(figidx,idx) + } + else { + idx <- c() + + for(l in 1:nrow( allion[coord2,c("trace","comb")])) { + x <- as.matrix(allion[coord2,c("trace","comb")][l,]) + raw <- strsplit(x[1],";")[[1]] + if(x[2]!="neg"){ + mraw <- apply(peaklistP[raw, coordsP], 1, mean, na.rm=TRUE) + } + else { + + mraw <- apply(peaklistN[raw, coordsN], 1, mean, na.rm=TRUE) + } + idx <- c(idx, raw[which.max(mraw)]) + } + + + idx <- unique(idx) + figidx <- rbind(figidx,c(idx,allion[coord2,"comb"][1])) + } + #figidx <- append(figidx,strsplit(allion[coord2,5], ";")[[1]][1]) + ans1 <- matrix("", nrow=length(coord), ncol=7+length(coords)) + ans1[,2]<-as.matrix(ReactMatrix[coord,5]) + prob <- as.matrix(prob_mean_ma[coord, i]) # need to change and compare a pair of mass/rt + # number figs + if ( i >= 100 ) { ans1[1,6]=i } + else { if ( i >= 10 ) { ans1[1,6]=paste(0,i, sep="") } else { ans1[1,6]=paste("00",i, sep="") } } + + if (sum(prob)>0) { + #prob <- prob/sum(prob) + o <- order(prob, decreasing=TRUE) + ans1[,-6] <- ans1[o,-6] + ans1 <- matrix(ans1, nrow=length(o)) + ans1[1,1] <- ReactMatrix[coord[1],3] + #ans1[,3] <- round(prob/min(prob[prob!=0]), 3)[o] + ans1[,3] <- round(prob, 3)[o] + if (length(prob[prob!=0])>1) { + entropy <- -sum(prob[prob!=0]*log(prob[prob!=0], length(prob[prob!=0]))) + } + else { entropy <- 0 + } + ans1[1,4] <- round(entropy, 3) + } + else { + ans1[1,1] <- ReactMatrix[coord[1],3] + ans1[1,3] <- "undef" + } + + if(!is.null(molIon$cameraobj)) { + ans1[1,7] <- apply(rpeaklist[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1,8:ncol(ans1)] <- as.matrix(peaklist[idx, coords]) + } + else { + if(allion[coord2,"comb"]=="pos"|allion[coord2,"comb"]=="both") { + ans1[1,7] <- apply(rpeaklistP[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1,8:ncol(ans1)] <- as.matrix(peaklistP[idx, coordsP]) + } + else { + ans1[1,7] <- apply(rpeaklistN[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1,8:ncol(ans1)] <- as.matrix(peaklistN[idx, coordsN]) + } + } + ans <- rbind(ans, as.matrix(ans1)) + } + ans <- ans[-1,] + # this option should change according with the bank + if(html) { + nid <- unlist(sapply(ans[,2], function(x) which(DB$id==x))) + #ans[,2] <- as.character(DB$name[nid]) + } + unk <- reactionM[reactionM[,5]=="unknown",] + ans1 <- matrix("", nrow=nrow(unk), ncol=7+length(coords)) + ans1[,1] <- unk[,3] + ans1[,2] <- unk[,5] + for(j in 1:nrow(ans1)) { + i <- j + max(as.numeric(ans[,6]),na.rm=TRUE) + if ( i >= 100 ) { ans1[j,6]=i } + else { if ( i >= 10 ) { ans1[j,6]=paste(0,i, sep="") } else { ans1[j,6]=paste("00",i, sep="") } } + } + # this step try to recover ids of ion annotation for masses without database annotation + idx2 <- c(); #for(m in 1:nrow(allion)) if(sum(allion[m,2]==as.numeric(unk[,2])) & sum(allion[m,1]==as.numeric(unk[,3]))) idx2 <- append(idx2, m) + # temp changes made 03/03/2014 have to check carefuly + lidx <- lapply(1:nrow(allion), function(m) which(allion[m,2]==unk[,2] & allion[m,1]==unk[,3])) + idx2 <- which(lapply(lidx, length)>0) + + if(!is.null(molIon$cameraobj)) { + idx <- as.vector(unlist(sapply(allion[idx2,"trace"], + function(x) { + x <- as.matrix(x) + raw <- strsplit(x,";")[[1]] + mraw <- apply(peaklist[raw, coords], 1, mean) + raw[which.max(mraw)] + } + + ) + ) + ) + } + + else { + # don't know what happened here with apply + idx <- c() + + for(i in 1:nrow( allion[idx2,c("trace","comb")])) { + x <- as.matrix(allion[idx2,c("trace","comb")][i,]) + raw <- strsplit(x[1],";")[[1]] + if(x[2]!="neg"){ + mraw <- apply(peaklistP[raw, coordsP], 1, mean, na.rm=TRUE) + } + else { + + mraw <- apply(peaklistN[raw, coordsN], 1, mean, na.rm=TRUE) + } + idx <- c(idx, raw[which.max(mraw)]) + } + + + + tmpidx <- cbind(idx,allion[idx2,"comb"]) + } + if(!is.null(molIon$cameraobj)) { + ans1[,7] <- apply(rpeaklist[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[,8:ncol(ans1)] <- as.matrix(peaklist[idx, coords]) + } + else { + idxP <- tmpidx[tmpidx[,2]!="neg",1] + ans1[1:length(idxP),7] <- apply(rpeaklistP[idxP,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1:length(idxP),8:ncol(ans1)] <- as.matrix(peaklistP[idxP, coordsP]) + idxN <- tmpidx[tmpidx[,2]=="neg",1] + ans1[(length(idxP)+1):nrow(ans1),7] <- apply(rpeaklistN[idxN,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[(length(idxP)+1):nrow(ans1),8:ncol(ans1)] <- as.matrix(peaklistN[idxN, coordsN]) + } + ans <- rbind(ans, as.matrix(ans1)) + + if(!is.null(molIon$cameraobj)) { + figidx <- c(figidx,idx) + figidx <- as.numeric(figidx[-1]) + } + else { + figidx <- rbind(figidx,tmpidx) + allidx <- figidx[-1,] + allidx <- cbind(allidx, ans[ans[,6]!="",6]) + figidx <- as.numeric(figidx[-1,1]) + } + + + if(m.test=="none") { + testname <- "none" + #testname <- "Formula" + #ans[ans[,2]!="unknown",][,5] <- as.character(DB$formula[nid]) + } + if(m.test=="t.test") { + normalize.medFC <- function(mat) { + # Perform median fold change normalisation + # X - data set [Variables & Samples] + medSam <- apply(mat, 1, median) + medSam[which(medSam==0)] <- 0.0001 + mat <- apply(mat, 2, function(mat, medSam){ + medFDiSmpl <- mat/medSam + vec<-mat/median(medFDiSmpl) + return(vec) + }, medSam) + return (mat) + } + # this piece of code was copied from xcms + pval <- function(X, classlabel, teststat) { + + n1 <- rowSums(!is.na(X[,classlabel == 0])) + n2 <- rowSums(!is.na(X[,classlabel == 1])) + A <- apply(X[,classlabel == 0], 1, sd, na.rm=TRUE)^2/n1 ## sd(t(X[,classlabel == 0]), na.rm = TRUE)^2/n1 + B <- apply(X[,classlabel == 1], 1, sd, na.rm=TRUE)^2/n2 ## sd(t(X[,classlabel == 1]), na.rm = TRUE)^2/n2 + df <- (A+B)^2/(A^2/(n1-1)+B^2/(n2-1)) + + pvalue <- 2 * (1 - pt(abs(teststat), df)) + invisible(pvalue) + } + + c1 <- grep(class1, molIon$cameraobj@xcmsSet@phenoData[,1]) + c2 <- grep(class2, molIon$cameraobj@xcmsSet@phenoData[,1]) + testclab <- c(rep(0, length(c1)), rep(1, length(c2))) + testval <- groupval(molIon$cameraobj@xcmsSet, "medret", "into") + if(norm) testval <- normalize.medFC(testval) + tstat <- mt.teststat(testval, testclab) + pvalue <- pval(testval, testclab, tstat) + +# +# rport <- diffreport(molIon$cameraobj@xcmsSet, class1=class1, class2= class2, sortpval=FALSE) +# ans[ans[,6]!="",5] <- rport[figidx, "pvalue"] + ans[ans[,6]!="",5] <- pvalue[figidx] + testname <- "t.test p-value" + } + if(m.test=="anova"){ + class <- molIon$cameraobj@xcmsSet@phenoData + getPvalue <- function(dataidx) { + aov.data <- data.frame(resp=as.numeric(peaklist[dataidx,coords]), class=class) + anova(aov(resp~class, aov.data))$Pr[1] + } + testname <- "anova p-value" + ans[ans[,6]!="",5] <- sapply(figidx, getPvalue) + } + + header <- matrix(c("Proposed Mass","Most probable Compound","Probability","Entropy", testname,"EIC-plot", "Ion annotation",coords), nrow=1 , ncol=7+length(coords) ) + ans <- rbind(header, ans) + + + # additional field + # ans <- cbind(ans[,1:2], ans[,2], ans[,3:ncol(ans)]) + #ans[ans[,3]!="unknown",][-1,3] <- as.character(DB$sbml.id[nid]) + + if(html) { + #require(hwriter) + ansb <- ans + ans[ans[,2]!="unknown",][-1,2] <- as.character(DB$name[nid]) + if(linkPattern=="pubchem") ansb <- ans + + hyper=matrix(paste(linkURL, ansb[-1,2], sep=""),ncol=1 ) + if(!is.null(molIon$cameraobj)) { + hyper1=matrix(paste(fig, ans[-1,6],".png", sep=""),ncol=1 ) + } + else { + hyper1 <- ans[-1,6] + hyper1[ans[-1,6]!=""][allidx[,2]!="neg"] <- paste(hyper1[ans[-1,6]!=""][allidx[,2]!="neg"], "pos", sep="") + hyper1[ans[-1,6]!=""][allidx[,2]=="neg"] <- paste(hyper1[ans[-1,6]!=""][allidx[,2]=="neg"], "neg", sep="") + hyper1=matrix(paste(fig, hyper1,".png", sep=""),ncol=1 ) + } + p=openPage(paste(filename,".html",sep="")) + ans2 <- ans[,1:7] + link <- cbind(matrix(NA,nrow(ans2),1),rbind(NA,hyper),matrix(NA,nrow(ans2),3),rbind(NA,hyper1),matrix(NA,nrow(ans2),1)) + # This block is responsible to add as many columns as the user + # wants + if(!is.null(addLink)){ + for(l in 1:length(addLink$link)) { + link <- cbind(link, rbind(NA, addLink[[l]])) + } + for(a in 1:length(addLink$ans)) { + ans2 <- cbind(ans2,addLink$ans[[a]]) + #colnames(ans2)[7+a] <- addLink$ans[[a]][1] + } + } + hwrite(ans2, p,row.bgcolor='#ffdc98', link=link ) + closePage(p) + if(!is.null(molIon$cameraobj)) { + plotEIC(molIon$cameraobj@xcmsSet, figidx, ans[ans[,6]!="",6][-1], colorplot=colorplot) + } + else { + dataidxP <- as.numeric(allidx[allidx[,2]!="neg",1]) + pngidxP <- allidx[allidx[,2]!="neg",3] + plotEIC(molIon$pos@xcmsSet, dataidxP, pngidxP, "pos", colorplot=colorplot) + dataidxN <- as.numeric(allidx[allidx[,2]=="neg",1]) + pngidxN <- allidx[allidx[,2]=="neg",3] + plotEIC(molIon$neg@xcmsSet, dataidxN, pngidxN, "neg", colorplot=colorplot) + } + + } + else { + ansb <- ans + } + colnames(ansb) <- ansb[1,] + ansb <- ansb[-1,] + return(list(classTable=ansb, figidx=figidx)) +} diff --git a/galaxy/probmetab/lib.r b/galaxy/probmetab/lib.r new file mode 100644 index 0000000..b61cccd --- /dev/null +++ b/galaxy/probmetab/lib.r @@ -0,0 +1,323 @@ +# lib.r ProbMetab version="1.0.0" +# Author: Misharl Monsoor ABIMS TEAM mmonsoor@sb-roscoff.fr +# Contributors: Yann Guitton and Jean-francois Martin + + +##Main probmetab function launch by the Galaxy ProbMetab wrapper +probmetab = function(xa, xaP, xaN, variableMetadata, variableMetadataP, variableMetadataN, listArguments){ + ##ONE MODE ACQUISITION## + if(listArguments[["mode_acquisition"]]=="one") { + comb=NULL + + #Get the polarity from xa object + polarity=xa@polarity + #SNR option + if ("xsetnofill" %in% names(listArguments)) { + load(listArguments[["xsetnofill"]]) + xsetnofill=xset + } + else{ + xsetnofill=NULL + } + #Exclude samples + if ("toexclude" %in% names(listArguments)) { + toexclude=listArguments[["toexclude"]] + } + else { + toexclude=NULL + } + ionAnnot=get.annot(xa, polarity=polarity, allowMiss=listArguments[["allowMiss"]],xset=xsetnofill,toexclude=toexclude) + comb=NULL + } + + ##TWO MODES ACQUISITION## + #Mode annotatediffreport + else if(listArguments[["inputs_mode"]]=="two"){ + ##Prepare the objects that will be used for the get.annot function + comb=1 + + + xsetPnofill=NULL + xsetNnofill=NULL + # TODO: a reactiver + #if ("xsetPnofill" %in% names(listArguments)) { + # load(listArguments[["xsetPnofill"]]) + # xsetPnofill=xset + #} + #if ("xsetNnofill" %in% names(listArguments)) { + # load(listArguments[["xsetNnofill"]]) + # xsetNnofill=xset + #} + # include CAMERA non-annotated compounds, and snr retrieval + # comb 2+ - used on Table 1 + ionAnnotP2plus = get.annot(axP, allowMiss=listArguments[["allowMiss"]], xset=xsetPnofill,toexclude=listArguments[["toexclude"]]) + ionAnnotN2plus = get.annot(axN, polarity="negative", allowMiss=listArguments[["allowMiss"]], xset=xsetNnofill,toexclude=listArguments[["toexclude"]]) + ionAnnot = combineMolIon(ionAnnotP2plus, ionAnnotN2plus) + print(sum(ionAnnot$molIon[,3]==1)) + print(sum(ionAnnot$molIon[,3]==0)) + write.table(ionAnnot[1], sep="\t", quote=FALSE, row.names=FALSE, file="CombineMolIon.tsv") + #Merge variableMetadata Negative and positive acquisitions mode + + + #Mode combinexsannos TODO bug avec tableau issus de combinexsannos + #else { + #load(listArguments[["image_combinexsannos"]]) + #image_combinexsannos=cAnnot + ##Prepare the objects that will be used for the combineMolIon function + #load(listArguments[["image_pos"]]) + #image_pos=xa + #ionAnnot=combineMolIon(peaklist=cAnnot, cameraobj=image_pos, polarity="pos") + #} + + } + + ##DATABASE MATCHING## + if (listArguments[["kegg_db"]]=="KEGG"){ + DB=build.database.kegg(orgID = NULL) + } + else{ + table_list <<- NULL + ids=strsplit(listArguments[["kegg_db"]],",") + ids=ids[[1]] + if(length(ids)>1){ + for(i in 1:length(ids)){ + table_list[[i]] <- build.database.kegg(ids[i]) + } + db_table=do.call("rbind",table_list) + DB=unique(db_table) + } + else{ + DB=build.database.kegg(listArguments[["kegg_db"]]) + } + } + #Matching des mass exactes mesurées avec les masses des compounds KEGG (pas M+H ou M-H) + reactionM = create.reactionM(DB, ionAnnot, ppm.tol=listArguments[["ppm_tol"]]) + ##PROBABILITY RANKING## + # number of masses with candidates inside the fixed mass window + # and masses with more than one candidate + length(unique(reactionM[reactionM[,"id"]!="unknown",1])) + sum(table(reactionM[reactionM[,"id"]!="unknown",1])>1) + #if (listArguments[["useIso"]]){ + #BUG TODO + # Calculate the ratio between observed and theoretical isotopic patterns. + # If you don't have an assessment of carbon offset to carbon number prediction + # skip this step and use the reactionM as input to weigthM function. + #isoPatt <­ incorporate.isotopes(comb2plus, reactionM, , samp=12:23, DB=DB) + # calculate the likelihood of each mass to compound assignment using mass accuracy,and isotopic pattern, when present + #wl <­ weightM(isoPatt,intervals=seq(0,1000,by=500), offset=c(3.115712, 3.434146, 2.350798)) + + #isoPatt=incorporate.isotopes(ionAnnot, reactionM,comb=comb,var=listArguments[["var"]],DB=DB) + + #wl =weightM(reactionM, useIso=true) + #} + #else { + #wl =weightM(reactionM, useIso=FALSE) + #} + wl =weightM(reactionM, useIso=FALSE) + w = design.connection(reactionM) + # Probability calculations + x = 1:ncol(wl$wm) + y = 1:nrow(wl$wm) + conn = gibbs.samp(x, y, 5000, w, wl$wm) + ansConn = export.class.table(conn, reactionM, ionAnnot, DB=DB,html=listArguments[["html"]],filename="AnalysisExample",prob=listArguments[["prob"]]) + if(listArguments[["html"]]){ + #Zip the EICS plot + system(paste('zip -r "Analysis_Report.zip" "AnalysisExample_fig"')) + } + + # calculate the correlations and partial correlations and cross reference then with reactions + mw=which(w==1,arr.ind=TRUE) + #reac2cor function : Use the intensity of putative molecules in repeated samples to calculate correlations and partial + #correlation in a user defined threshold of false discovery rate for significance testing. After the + #correlation test the function also overlay significant correlations with all putative reactions between + #two masses. + #It generates a list of estimated correlations and reactions. + corList=reac2cor(mw,ansConn$classTable,listArguments[["opt"]],listArguments[["corths"]],listArguments[["corprob"]],listArguments[["pcorprob"]]) + ans=list("ansConn"=ansConn,"corList"=corList) + #Generate the siff table for CytoScape + cytoscape_output(corList,ansConn) + + + #Execute the merge_probmetab function to merge the variableMetadata table and annotations from ProbMetab results + + if(listArguments[["mode_acquisition"]]=="one") { + #Retrocompatibility with previous annotateDiffreport variableMetadata dataframe (must replace mzmed column by mz, and rtmed by rt) + names(variableMetadata)[names(variableMetadata)=="mzmed"] <- "mz" + names(variableMetadata)[names(variableMetadata)=="rtmed"] <- "rt" + variableM=merge_probmetab(variableMetadata, ansConn) + write.table(variableM, sep="\t", quote=FALSE, row.names=FALSE, file="variableMetadata.tsv") + } else if (listArguments[["mode_acquisition"]]=="two") { + #Retrocompatibility with previous annotateDiffreport variableMetadata dataframe (must replace mzmed column by mz, and rtmed by rt) + names(variableMetadataP)[names(variableMetadata)=="mzmed"] <- "mz" + names(variableMetadataP)[names(variableMetadata)=="rtmed"] <- "rt" + names(variableMetadataN)[names(variableMetadata)=="mzmed"] <- "mz" + names(variableMetadataN)[names(variableMetadata)=="rtmed"] <- "rt" + variableMP=merge_probmetab(variableMetadataP, ansConn) + write.table(variableMP, sep="\t", quote=FALSE, row.names=FALSE, file="variableMetadata_Positive.tsv") + variableMN=merge_probmetab(variableMetadataN, ansConn) + write.table(variableMN, sep="\t", quote=FALSE, row.names=FALSE, file="variableMetadata_Negative.tsv") + } + + return(ans) + +} + +##Function that generates a siff table for CytoScape +cytoscape_output=function(corList,ansConn){ + signif_cor=as.data.frame(corList$signif.cor) + classTable=as.data.frame(ansConn$classTable) + #Siff table + siff_table=cbind(signif_cor["node1"],signif_cor["cor"],signif_cor["node2"]) + #attribute table output for Cytoscape + + ## START Code part from the export2cytoscape function of ProbMetab written by Ricardo R. Silva + for (i in 1:nrow(classTable)) if (classTable[i, 1] == ""){ + classTable[i, c(1, 4, 6, 7)] <- classTable[i - 1, c(1, 4, 6, 7)] + } + msel <- as.matrix(classTable[, 1:7]) + msel <- cbind(msel[, 6], msel[,-6]) + colnames(msel)[1] <- "Id" + msel[, 1] <- sub("^\\s+", "", msel[, 1]) + colnames(msel)[1] <- "Id" + ids <- unique(msel[, 1]) + attrMatrix <- matrix("", nrow = length(ids), ncol = ncol(msel)-1) + for (i in 1:length(ids)) { + attrMatrix[i, 1] <- unique(msel[msel[, 1] == ids[i], + 2]) + attrMatrix[i, 2] <- paste("[", paste(msel[msel[, + 1] == ids[i], 3], collapse = ", "), "]", sep = "") + attrMatrix[i, 3] <- paste("[", paste(msel[msel[, + 1] == ids[i], 4], collapse = ", "), "]", sep = "") + attrMatrix[i, 4] <- unique(msel[msel[, 1] == ids[i], + 5]) + attrMatrix[i, 5] <- paste("[", paste(msel[msel[, + 1] == ids[i], 6], collapse = ", "), "]", sep = "") + attrMatrix[i, 6] <- unique(msel[msel[, 1] == ids[i], + 7]) + } + ids <- as.numeric(unique(msel[, 1])) + attrMatrix <- cbind(ids, attrMatrix) + colnames(attrMatrix) <- colnames(msel) + ## END Code part from the export2cytoscape function of ProbMetab writieen by Ricardo R. Silva + write.table(attrMatrix, sep="\t", quote=FALSE, row.names=FALSE, file="Analysis_Report.tsv") + write.table(siff_table, sep="\t", quote=FALSE, row.names=FALSE, file="sif.tsv") + + return(attrMatrix) +} + +##Functions written by Jean-François Martin + +deter_ioni <- function (aninfo, pm) +{ + # determine ionisation in ProbMetab result file, used in function merge_probmetab + # input : for 1 ion, aninfo = string with m/z rt and CAMERA annotation from ProbMetab result file + # if the difference between m/z and the probmetab proposed mass is ~1 we use the sign (positive or negative) of this diference + # to define the type of ionisation + # If adduct or fragment was detected, therefore diff >>1 and so, we search for substring "+" ou "2+" ou "3+" ou "-"... + # to define the type of ionisation + # aninfo : vecteur of character resulting of the parsing(sep="#") of the probmetab annotation + if (round(abs(as.numeric(aninfo[1]) - pm),0) ==1) { + if (as.numeric(aninfo[1]) - pm <0) {esi <- "n"} else {esi <- "p"} + } else + if (!is.na(aninfo[4])) { + anstr <- aninfo[4] + # cat(anstr) + if ((grepl("]+",anstr,fixed=T)==T) || (grepl("]2+",anstr,fixed=T)==T) || (grepl("]3+",anstr,fixed=T)==T)) { esi <- "p"} + else + if ((grepl("]-",anstr,fixed=T)==T) || (grepl("]2-",anstr,fixed=T)==T) || (grepl("]3-",anstr,fixed=T)==T)) { esi <- "n"} + # cat(" ioni ",esi,"\n") + } else + { esi <- "u"} + + return(esi) +} + + +merge_probmetab <- function(metaVar,ansConn) { + ## Parse ProbMetab information result file and merge in variable_metaData initial file + ## inputs : + ## metaVar : data.frame of metadataVariable input of probmetab function + ## ansConn : data.frame of ProbMetab result + ## output : dataframe with Probmetab results merge with variableMetadata + ## Constante + ## iannot : indice de la colonne annotation dans le resultat de probMetab + iannot <- 4 + + ## definition of an unique identification of ions mz with 3 decimals and rt(sec) with 1 decimal to avoid + ## duplicate ions name in the diffreport result file + ions <- paste ("M",round(metaVar$mz,3),"T",round(metaVar$rt,1),sep="") + metaVar <- data.frame(ions,metaVar) + + ###### Result data.frame from ProbMetab result list + an_ini <- ansConn$classTable + + ## Suppression of rows without mz and rt or unknown and columns of intensities + ## COLUMNS SUBSCRIPTS HAVE TO BE CHECKED WITh DIFFERENT RESULTS FILES + an <- an_ini[(an_ini[,2]!="unknown"),c(1,2,3,7)] + ## initialisation of vectors receiving the result of the parse of the column annotation (subscrip iannot) + mz <- rep(0,dim(an)[1]) + rt <- rep(0,dim(an)[1]) + propmz <- rep(0,dim(an)[1]) + ioni <- rep("u",dim(an)[1]) + + ## parse the column annotation and define ionisation mode + for (i in 1:dim(an)[1]) { + if (an[i,1] != "") { + info_mzrt <- unlist(strsplit(an[i,iannot],"#")) + propmz[i] <- as.numeric(an[i,1]) + mz[i] <- as.numeric(info_mzrt[1]) + rt[i] <- as.numeric(info_mzrt[2]) + ioni[i] <- deter_ioni(info_mzrt,as.numeric(an[i,1])) + } + else { + propmz[i] <- as.numeric(propmz[i-1]) + mz[i] <- as.numeric(mz[i-1]) + rt[i] <- as.numeric(rt[i-1]) + ioni[i] <- ioni[i-1] + } + } + + ## definition of an unique identification of ions : mz with 3 decimals and rt(sec) with 1 decimal + ## The same as for the metadataVariable data.frame to match with. + ions <- paste ("M",round(mz,3),"T",round(rt,1),sep="") + an <- data.frame(ions,ioni,propmz,mz,rt,an) + + ## transposition of the different probmetab annotations which are in different rows in the initial result data.frame + ## on only 1 row separated with a ";" + li <- as.matrix(table(an$propmz)) + li <- data.frame(dimnames(li)[1],li) + dimnames(li)[[2]][1] <- "propmz" + ions <- rep("u",dim(li)[1]) + propmz <- rep(0,dim(li)[1]) + mpc <- rep("c",dim(li)[1]) + proba <- rep("p",dim(li)[1]) + c <- 0 + while (c < dim(li)[1]) { + c <- c + 1 + suban <- an[an$propmz==li[c,1],] + ions[c] <- as.character(suban[1,1]) + propmz[c] <- as.numeric(suban[1,3]) + mpc[c] <- paste(suban[,7],collapse=";") + proba[c] <- paste(as.character(suban[,8]),collapse=";") + } + + ## Creation of the data.frame with 1 row per ions + anc <- data.frame(ions,propmz,mpc,proba) + anc <- anc[order(anc[,1]),] + + metaVarFinal <- merge(metaVar, anc, by.x=1, by.y=1, all.x=T, all.y=T) + metaVarFinal <- metaVarFinal[,-1] + #write.table(metaVarFinal,file="res.txt", sep="\t", row.names=F, quote=F) + + return (metaVarFinal) +} + +# RETROCOMPATIBILITE avec ancienne version de annotate +getVariableMetadata = function(xa) { + # --- variableMetadata --- + peakList=getPeaklist(xa) + peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); + variableMetadata=peakList[,!(colnames(peakList) %in% c(sampnames(xa@xcmsSet)))] + variableMetadata$name= paste("M",round(variableMetadata$mz),"T",round(variableMetadata$rt),sep="") + return (variableMetadata) +} diff --git a/galaxy/probmetab/probmetab.r b/galaxy/probmetab/probmetab.r new file mode 100644 index 0000000..063d00e --- /dev/null +++ b/galaxy/probmetab/probmetab.r @@ -0,0 +1,103 @@ +#!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file +# probmetab.r version="1.0.0" +# Author: Misharl Monsoor ABIMS TEAM mmonsoor@sb-roscoff.fr + + +# ----- LOG ----- +log_file=file("probmetab.log", open = "wt") +sink(log_file) +sink(log_file, type = "out") + +# ----- PACKAGE ----- +cat("\tPACKAGE INFO\n") +pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "igraph", "xcms","snow","CAMERA","batch","ProbMetab") +for(p in pkgs) { + suppressPackageStartupMessages( stopifnot( library(p, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(p,"\t",as.character(packageVersion(p)),"\n",sep="") +} + +source_local <- function(fname){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep="/")) +} +cat("\n\n") +# ----- ARGUMENTS ----- +cat("\tARGUMENTS INFO\n") +listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +write.table(as.matrix(listArguments), col.names=F, quote=F, sep='\t') + + +# ----- PROCESSING INFILE ----- +cat("\tINFILE PROCESSING INFO\n") + +# ----- INFILE PROCESSING ----- + +if(listArguments[["mode_acquisition"]]=="one") { + load(listArguments[["xa"]]) + #Unzip the chromatograms file for plotting EIC pour the HTML file + if(exists("zipfile")) + { + if (zipfile!=""){ + directory=unzip(zipfile) + } + } + if (!exists("xa")) { + xa=xsAnnotate_object + } + source_local("lib.r") + if (!exists("variableMetadata")) variableMetadata= getVariableMetadata(xa); + +} else if(listArguments[["inputs_mode"]]=="two"){ + load(listArguments[["image_pos"]]) + #Unzip the chromatograms file for plotting EIC pour the HTML file + if(exists("zipfile")) { + if (zipfile!=""){ + directory=unzip(zipfile) + } + } + if (!exists("xa")) { + xa=xsAnnotate_object + } + xaP=xa + source_local("lib.r") + if (!exists("variableMetadata")) variableMetadataP= getVariableMetadata(xa) + else variableMetadataP=variableMetadata + + + load(listArguments[["image_neg"]]) + #Unzip the chromatograms file for plotting EIC pour the HTML file + if(exists("zipfile")) { + + if (zipfile!=""){ + directory=unzip(zipfile) + } + } + if (!exists("xa")) { + xa=xsAnnotate_object + } + xaN=xa + source_local("lib.r") + + if (!exists("variableMetadata")) variableMetadataN= getVariableMetadata(xa) + else variableMetadataN=variableMetadata +} + +#Import the different functions +source_local("lib.r") +source_local("export.class.table-color-graph.R") + +# ----- PROCESSING INFO ----- +cat("\tMAIN PROCESSING INFO\n") + +if(listArguments[["mode_acquisition"]]=="one") { + results=probmetab(xa=xa, variableMetadata=variableMetadata,listArguments=listArguments) +} else if(listArguments[["inputs_mode"]]=="two"){ + results=probmetab(xaP=xaP, xaN=xaN,variableMetadataP=variableMetadataP, variableMetadataN=variableMetadataN, listArguments=listArguments) +} +#delete the parameters to avoid the passage to the next tool in .RData image +#rm(listArguments) +cat("\tDONE\n") +#saving R data in .Rdata file to save the variables used in the present tool +#save.image(paste("probmetab","RData",sep=".")) + diff --git a/galaxy/probmetab/static/images/probmetab_workflow.png b/galaxy/probmetab/static/images/probmetab_workflow.png new file mode 100755 index 0000000..b088957 Binary files /dev/null and b/galaxy/probmetab/static/images/probmetab_workflow.png differ