Skip to content

Commit

Permalink
Merge pull request #9 from RogerGinBer/devel
Browse files Browse the repository at this point in the history
Update 12/21
  • Loading branch information
RogerGinBer authored Dec 29, 2021
2 parents 5ca8d8c + 0aed33f commit 10189ac
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 175 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Description: RHermes is a broad-scoped targeted metabolomics package to identify
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
VignetteBuilder: knitr
biocViews: MassSpectrometry, Metabolomics, GUI
BugReports: https://github.com/RogerGinBer/RHermes/issues
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(grDevices,colorRamp)
importFrom(grDevices,rgb)
importFrom(keras,array_reshape)
importFrom(keras,k_argmax)
importFrom(keras,load_model_hdf5)
importFrom(keras,predict_classes)
importFrom(stats,IQR)
Expand Down
21 changes: 10 additions & 11 deletions R/IL_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ function(struct, id, rts, minint = Inf) {
#'@param maxInjections Numeric, the maximum number of planned injections to
#' export. Defaults to 9999 to export all of them.
#'@param sepFiles Logical, whether to generate a single csv file or multiple
#' csvs, each corresponding to each injection/chromatographic run. From our
#' experience with an Orbitrap Fusion, separate csvs will simplify the task.
#' csvs, each corresponding to each injection/chromatographic run.
#'@return Nothing. As a side effect, it generates one/multiple .csv files with
#' the inclusion list data
#'@examples
Expand All @@ -79,14 +78,14 @@ function(struct, id, rts, minint = Inf) {
#'@export
setGeneric("exportIL", function(struct, id, file = "./InclusionList",
mode = "both", maxOver = 5, defaultIT = 100,
sepFiles = FALSE, maxInjections = 9999) {
sepFiles = TRUE, maxInjections = 9999) {
standardGeneric("exportIL")
})

#'@rdname exportIL
setMethod("exportIL", c("RHermesExp", "numeric", "ANY", "ANY", "ANY", "ANY"),
function(struct, id, file = "./InclusionList", mode = "both", maxOver = 5,
defaultIT = 100, sepFiles = FALSE, maxInjections = 9999) {
defaultIT = 100, sepFiles = TRUE, maxInjections = 9999) {
validObject(struct)
if (length(struct@data@MS2Exp) == 0) {
stop("This object doesn't have any ILs")
Expand All @@ -96,7 +95,7 @@ function(struct, id, file = "./InclusionList", mode = "both", maxOver = 5,
}
IL <- struct@data@MS2Exp[[id]]@IL@IL
IL$IT <- defaultIT
plan <- injectionPlanner(IL, 10, maxOver, byMaxInt = TRUE,
plan <- injectionPlanner(IL, maxOver = maxOver, byMaxInt = TRUE,
injections = maxInjections,
mode = mode)
if (sepFiles) {
Expand Down Expand Up @@ -130,9 +129,8 @@ function(struct, id, file = "./InclusionList", mode = "both", maxOver = 5,
})


injectionPlanner <- function(IL, injections, maxover, byMaxInt = TRUE,
mode = "continuous", returnAll = FALSE) {
if (returnAll) {injections <- 1e4}
injectionPlanner <- function(IL, injections, maxOver, byMaxInt = TRUE,
mode = "continuous") {
if (byMaxInt) {IL <- IL[order(-IL$MaxInt), ]}
idx <- which(is.na(IL$start) | is.na(IL$end)) #NA depuration
if (length(idx) != 0) {IL <- IL[-idx, ]}
Expand All @@ -157,7 +155,7 @@ injectionPlanner <- function(IL, injections, maxover, byMaxInt = TRUE,
ok_entries <- c()
for (i in seq_len(nrow(IL))) {
timeidx <- which(timeInt >= IL$start[i] & timeInt <= IL$end[i])
if (any(OL[timeidx] >= maxover)) {next}
if (any(OL[timeidx] >= maxOver)) {next}
ok_entries <- c(ok_entries, i)
OL[timeidx] <- OL[timeidx] + 1
}
Expand Down Expand Up @@ -186,7 +184,8 @@ injectionPlanner <- function(IL, injections, maxover, byMaxInt = TRUE,
if(mode == "both" & nrow(deep_IL) != 0){
message(paste(nrow(deep_IL), "high IT scans could not be planned",
"within the continuous MS2 injections.",
"Adding them separately."))
"Adding them separately after injection",
length(plan)))
while (nrow(deep_IL) != 0 & injections > 0) {
timeInt <- seq(min(deep_IL$start, na.rm = TRUE),
max(deep_IL$end, na.rm = TRUE),
Expand Down Expand Up @@ -231,7 +230,7 @@ injectionPlanner <- function(IL, injections, maxover, byMaxInt = TRUE,
if (nrow(IL) != 0) {
message(paste0(nrow(IL), "ILs haven't been added to the injection plan",
" due to lack of space. Try again with more injections,",
" more maxover or returnAll = TRUE"))
" more maxOver or maxInjections"))
}
# print(plot(cumsum(sapply(plan, nrow))))
# hist_data <- lapply(seq_along(plan), function(x){
Expand Down
2 changes: 1 addition & 1 deletion R/Plot_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ function(struct, id, entry, plot = TRUE) {
isoname <- paste0("[", atom, "]", 1)
cur <- df[df$code == isoname, ]
th <- cur$abundance[cur$class == "Theoretical"]
if (th < 30000) {
if (th[1] < 30000) {
return(NA)
}
exp <- cur$abundance[cur$class == "Experimental"]
Expand Down
105 changes: 59 additions & 46 deletions R/SOI_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,15 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) {
if (any(Groups$length > maxlen)) {
Groups <- groupShort(Groups, maxlen)
}

message("Number of scans:")
nscans <- apply(Groups, 1, function(x) {
d <- x["peaks"][[1]]
return(dim(d)[1])
})
Groups$nscans <- nscans
Groups <- Groups[Groups$nscans > 5, ]

## Blank substraction
if (useblank) {
Groups <- blankSubstraction(Groups, blankPL)
Expand Down Expand Up @@ -201,14 +210,6 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) {
Groups$MaxInt <- MaxI
Groups <- Groups[Groups$MaxInt > 0, ]

message("Number of scans:")
nscans <- apply(Groups, 1, function(x) {
d <- x["peaks"][[1]]
return(dim(d)[1])
})
Groups$nscans <- nscans
Groups <- Groups[Groups$nscans > 2, ]

message("Converting from ionic formula to F/A combinations:")
Groups$anot <- lapply(Groups$formula, function(x) {
apply(FA_to_ion[.(x), c(1, 3)], 1, function(y) {
Expand Down Expand Up @@ -378,43 +379,43 @@ parallelGroupShort <- function(i, LG, maxlen){
starts <- pks[,2]
ends <- pks[,3]
known_peak <- rep(TRUE, nrow(pks))
if (nrow(pks) > 1) {
for (i in seq_len(nrow(pks) - 1)) {
if (ends[i] < starts[i + 1]) {
starts <- c(starts, ends[i])
ends <- c(ends, starts[i + 1])
known_peak <- c(known_peak, FALSE)
if (nrow(pks) > 1) {
for (i in seq_len(nrow(pks) - 1)) {
if (ends[i] < starts[i + 1]) {
starts <- c(starts, ends[i])
ends <- c(ends, starts[i + 1])
known_peak <- c(known_peak, FALSE)
}
}
}
}
if (min(ms1data[,1]) < min(starts)) {
ends <- c(ends, min(starts))
starts <- c(starts, min(ms1data[, 1]))
known_peak <- c(known_peak, FALSE)
}
if (max(ms1data[,1]) > max(ends)) {
starts <- c(starts, max(ends))
ends <- c(ends, max(ms1data[, 1]))
known_peak <- c(known_peak, FALSE)
}
if (any(!known_peak)) {
too_long <- ends - starts > maxlen
if (any(too_long & !known_peak)) {
for (i in which(too_long & !known_peak)) {
curstart <- starts[i]
curend <- ends[i]
times <- seq(from = curstart, to = curend,
if (min(ms1data[,1]) < min(starts)) {
ends <- c(ends, min(starts))
starts <- c(starts, min(ms1data[, 1]))
known_peak <- c(known_peak, FALSE)
}
if (max(ms1data[,1]) > max(ends)) {
starts <- c(starts, max(ends))
ends <- c(ends, max(ms1data[, 1]))
known_peak <- c(known_peak, FALSE)
}
if (any(!known_peak)) {
too_long <- ends - starts > maxlen
if (any(too_long & !known_peak)) {
for (i in which(too_long & !known_peak)) {
curstart <- starts[i]
curend <- ends[i]
times <- seq(from = curstart, to = curend,
length.out = ceiling((curend - curstart) / maxlen) + 1)
starts <- c(starts, times[-length(times)])
ends <- c(ends, times[-1])
starts <- c(starts, times[-length(times)])
ends <- c(ends, times[-1])
}
starts <- starts[-which(too_long & !known_peak)]
ends <- ends[-which(too_long & !known_peak)]
}
starts <- starts[-which(too_long & !known_peak)]
ends <- ends[-which(too_long & !known_peak)]
}
}
NewGR <- data.table(start = starts, end = ends,
length = ends - starts, formula = curGR$formula,
peaks = curGR$peaks, mass = curGR$mass)
NewGR <- data.table(start = starts, end = ends,
length = ends - starts, formula = curGR$formula,
peaks = curGR$peaks, mass = curGR$mass)
} else {
#Divide long group into equal-sized smaller groups
times <- seq(from = curGR[1, 1][[1]], to = curGR[1, 2][[1]],
Expand All @@ -433,16 +434,16 @@ parallelGroupShort <- function(i, LG, maxlen){
return(NewGR)
}

#' @importFrom keras k_argmax array_reshape
blankSubstraction <- function(Groups, blankPL){
message("Blank substraction:")
setkeyv(blankPL, c("formv", "rt"))

message("First cleaning")
toKeep <- lapply(seq_len(nrow(Groups)), firstCleaning, Groups, blankPL) %>%
unlist()
sure <- Groups[which(toKeep), ]
if (any(toKeep)) {Groups <- Groups[-which(toKeep),]}
reticulate::py_available(initialize = TRUE)
reticulate::py_available(initialize = TRUE) #Start Python connection
if (reticulate::py_module_available("keras") &
reticulate::py_module_available("tensorflow")) {
model <- load_model_hdf5(system.file("extdata", "ImprovedModel.h5",
Expand All @@ -465,16 +466,28 @@ blankSubstraction <- function(Groups, blankPL){
organizeddata <- keras::array_reshape(organizeddata,
c(nrow(organizeddata), 400),
order = "C") #ANN input
q <- model %>% keras::predict_classes(organizeddata)
q <- model %>% predict(organizeddata) %>% k_argmax()

Groups <- Groups[-which(q == 0), ] #ANN output
Groups <- Groups[-which(q$numpy == 0), ] #ANN output
Groups <- Groups[, -c("MLdata")]

}
} else {
warning(paste("A Keras installation was not found and ANN blank",
"substraction was not performed"))
setkeyv(blankPL, "formv")
message("Preparing input for cosine similarity")
RES <- lapply(seq_len(nrow(Groups)), prepareNetInput, Groups, blankPL)
cosines <- sapply(RES, function(x){
if(is.na(x)[1]){return(Inf)}
philentropy::cosine_dist(x[1,] + 1e-12, x[2,] + 1e-12,
testNA = FALSE)
})
Groups <- Groups[cosines < 0.8, ]
warning("A Keras installation was not found and ANN blank ",
"subtraction was not performed. A less-accurate ",
"cosine similarity score was applied instead.")
}
Groups <- rbind(sure, Groups)
return(Groups)
}

#'@importFrom stats IQR quantile
Expand Down
Loading

0 comments on commit 10189ac

Please sign in to comment.