Skip to content

Commit

Permalink
feat: add featureArea for XcmsExperimentHdf5
Browse files Browse the repository at this point in the history
  • Loading branch information
jorainer committed Nov 25, 2024
1 parent 365042c commit ad0650d
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 45 deletions.
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ export(
"hasFilledChromPeaks",
"plotAdjustedRtime",
"groupOverlaps",
"featureArea",
"loadXcmsData",
"matchLamasChromPeaks",
"summarizeLamaMatch",
Expand Down Expand Up @@ -481,6 +480,7 @@ exportMethods("hasChromPeaks",
"adjustedRtime",
"adjustedRtime<-",
"adjustRtimePeakGroups",
"featureArea",
"featureDefinitions",
"featureDefinitions<-",
"featureValues",
Expand Down Expand Up @@ -607,4 +607,4 @@ export("PercentMissingFilter")
export("BlankFlag")

## HDF5 storage mode
exportClasses("XcmsExperimentHdf5")
exportClasses("XcmsExperimentHdf5")
3 changes: 2 additions & 1 deletion R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ setGeneric("factorGap", function(object) standardGeneric("factorGap"))
setGeneric("factorGap<-", function(object, value) standardGeneric("factorGap<-"))
setGeneric("family", function(object, ...) standardGeneric("family"))
setGeneric("family<-", function(object, value) standardGeneric("family<-"))
setGeneric("featureArea", function(object, ...) standardGeneric("featureArea"))

#' @title Extract ion chromatograms for each feature
#'
Expand Down Expand Up @@ -2144,4 +2145,4 @@ setGeneric("write.mzdata", function(object, ...) standardGeneric("write.mzdata")
setGeneric("write.mzQuantML", function(object, ...) standardGeneric("write.mzQuantML"))

## X
setGeneric("xcmsSource", function(object, ...) standardGeneric("xcmsSource"))
setGeneric("xcmsSource", function(object, ...) standardGeneric("xcmsSource"))
14 changes: 1 addition & 13 deletions R/XcmsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -982,18 +982,6 @@
chrs
}

#' @rdname XcmsExperiment
featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min,
rtmax = max, features = character()) {
if (!hasFeatures(object))
stop("No correspondence results available. Please run ",
"'groupChromPeaks' first.")
if (!length(features))
features <- rownames(featureDefinitions(object))
.features_ms_region(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin,
rtmax = rtmax, features = features)
}

#' @title Define MS regions for features
#'
#' @param x `XcmsExperiment` or `XCMSnExp`.
Expand Down Expand Up @@ -1266,4 +1254,4 @@ XcmsExperiment <- function() {
n@.processHistory <- from@processHistory
validObject(n)
n
}
}
16 changes: 15 additions & 1 deletion R/XcmsExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -1548,6 +1548,20 @@ setMethod(
else as.logical(nrow(object@featureDefinitions))
})

#' @rdname XcmsExperiment
setMethod(
"featureArea", "XcmsResult",
function(object, mzmin = min, mzmax = max, rtmin = min,
rtmax = max, features = character()) {
if (!hasFeatures(object))
stop("No correspondence results available. Please run ",
"'groupChromPeaks' first.")
if (!length(features))
features <- rownames(featureDefinitions(object))
.features_ms_region(object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin,
rtmax = rtmax, features = features)
})

#' @rdname XcmsExperiment
setReplaceMethod("featureDefinitions", "XcmsExperiment",
function(object, value) {
Expand Down Expand Up @@ -2072,4 +2086,4 @@ setMethod(
res <- do.call(rbind, res)
pb$tick()
res
})
})
91 changes: 67 additions & 24 deletions R/XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ NULL
}
if (!keepChromPeaks && hasChromPeaks(x)) {
x@chrom_peaks_ms_level <- integer()
x@gap_peaks_ms_level <- integer()
drop <- c(drop, .PROCSTEP.PEAK.DETECTION, .PROCSTEP.PEAK.FILLING,
.PROCSTEP.CALIBRATION, .PROCSTEP.PEAK.REFINEMENT)
}
Expand Down Expand Up @@ -566,7 +567,15 @@ NULL
}

#' Returns the rtmin, rtmax, mzmin and mzmax for each feature depending on the
#' associated chrom peaks.
#' associated chrom peaks. For XcmsExperimentHdf5 we have to:
#' - loop over each sample
#' - load the mapping of features to chrom peaks
#' - load the chrom peak data
#' - extract the required information per feature
#' - after the loop: aggregate the information.
#'
#' We could also use `featureValues()`, but would need to call that 4 times,
#' i.e. load the data 4 times.
#'
#' For the final calculation of the region boundaries using the functions
#' defined with parameters `mzmin`, `mzmax`, `rtmin` and `rtmax` we consider
Expand All @@ -575,39 +584,73 @@ NULL
#' in each sample (in contrast to the `.features_ms_region()` function that
#' considered all values for all chrom peaks of a feature).
#'
#' @param features `character` with the feature IDs.
#'
#' @noRd
.h5_features_ms_region <- function(x, mzmin, mzmax, rtmin, rtmax, feature_idx,
.h5_features_ms_region <- function(x, mzmin, mzmax, rtmin, rtmax, features,
ms_level = 1L) {
## - Get for each feature_idx, each sample the min rtmin, mzmin and max
## rtmax, mzmax.
## - add the values to a `matrix` or a `list`? what is more efficient?
## - calculate the boundaries using these.
## need to get for each feature all values to select min/max/median or
## whatever.
pb <- progress_bar$new(format = paste0("[:bar] :current/:",
"total (:percent) in ",
":elapsed"),
total = length(x@sample_id) + 1L, clear = FALSE)
pb$tick(0)
fids <- rhdf5::h5read(x@hdf5_file,
paste0("/features/ms_", ms_level,
"/feature_definitions_rownames"), drop = TRUE)
feature_idx <- unique(match(features, fids))
if (anyNA(feature_idx))
stop("Some of the provided feature IDs were not found in the",
" data set.", call. = FALSE)
feature_idx <- sort(feature_idx)
cn <- .h5_chrom_peaks_colnames(x, ms_level)
cn_idx <- match(c("mzmin", "mzmax", "rtmin", "rtmax"), cn)
template <- rep(NA_real_, length(feature_idx))
rtmin_mat <- rtmax_mat <- mzmin_mat <- mzmax_mat <-
vector("list", length(x@sample_id))
for (i in seq_along(x@sample_id)) {
sample_id <- x@sample_id[i]
sid <- paste0("/", sample_id, "/ms_", ms_level)
fidx <- xcms:::.h5_read_data(x@hdf5_file, sample_id,
fidx <- .h5_read_data(x@hdf5_file, sample_id,
name = "feature_to_chrom_peaks",
ms_level = ms_level)[[1L]]
fidx <- fidx[fidx[, 1L] %in% feature_ids, ]

vals <- .h5_read_data(hdf5_file, sample_id, name = "chrom_peaks",
ms_level = ms_level, j = col_idx)[[1L]]
LLLLLL
}
}

a <- function() {
m <- matrix(NA_real_, ncol = 1000, nrow = 2000)
for (i in 1:1000) {
fidx <- fidx[fidx[, 1L] %in% feature_idx, , drop = FALSE]
if (nrow(fidx)) {
vals <- .h5_read_data(
x@hdf5_file, sample_id, name = "chrom_peaks",
ms_level = ms_level, i = fidx[, 2L], j = cn_idx,
read_colnames = FALSE, read_rownames = FALSE)[[1L]]
idx <- as.factor(match(fidx[, 1L], feature_idx))
mzmin_mat[[i]] <- .h5_features_ms_region_values(
template, vals[, 1L], idx, dups = base::min)
mzmax_mat[[i]] <- .h5_features_ms_region_values(
template, vals[, 2L], idx, dups = base::max)
rtmin_mat[[i]] <- .h5_features_ms_region_values(
template, vals[, 3L], idx, dups = base::min)
rtmax_mat[[i]] <- .h5_features_ms_region_values(
template, vals[, 4L], idx, dups = base::max)
} else
mzmin_mat[[i]] <- mzmax_mat[[i]] <- rtmin_mat[[i]] <-
rtmax_mat[[i]] <- template
pb$tick()
}
mzmin_mat <- do.call(base::cbind, mzmin_mat)
mzmax_mat <- do.call(base::cbind, mzmax_mat)
rtmin_mat <- do.call(base::cbind, rtmin_mat)
rtmax_mat <- do.call(base::cbind, rtmax_mat)
res <- cbind(mzmin = apply(mzmin_mat, 1L, mzmin, na.rm = TRUE),
mzmax = apply(mzmax_mat, 1L, mzmax, na.rm = TRUE),
rtmin = apply(rtmin_mat, 1L, rtmin, na.rm = TRUE),
rtmax = apply(rtmax_mat, 1L, rtmax, na.rm = TRUE))
rownames(res) <- fids[feature_idx]
res[features, , drop = FALSE]
}

b <- function() {
l <- vector("list", 1000)
for (i in 1:1000) {
}
.h5_features_ms_region_values <- function(x, vals, map, dups = base::min) {
vl <- base::split(vals, map)
l <- lengths(vl) > 1L
x[as.integer(names(vl[!l]))] <- unlist(vl[!l], FALSE, FALSE)
x[as.integer(names(vl[l]))] <- vapply(vl[l], dups, x[1L], USE.NAMES = FALSE)
x
}

################################################################################
Expand Down
28 changes: 25 additions & 3 deletions R/XcmsExperimentHdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -426,12 +426,16 @@ setMethod(
## Identify for each feature the samples in which there is a missing
## value
fvals <- is.na(featureValues(object, msLevel = msLevel, method = "sum"))
keep <- rowSums(fvals) > 0
fidx <- seq_len(nrow(fvals))[keep]
fidx <- which(rowSums(fvals) > 0)
fvals <- fvals[keep, , drop = FALSE]
## Define the feature region to integrate signal from. Need to iterate
## over all samples/files.

message("Defining MS area to integrate signal from")
fr <- .h5_features_ms_region(
object, mzmin = param@mzmin, mzmax = param@mzmax,
rtmin = param@rtmin, rtmax = param@rtmax, feature_idx = fidx,
ms_level = msLevel)
## LLLLLL

feature_ids <- rownames(featureDefinitions(object, msLevel = msLevel))
fr <- .features_ms_region(object, mzmin = param@mzmin,
Expand Down Expand Up @@ -740,6 +744,24 @@ setMethod(
object
})

#' @rdname hidden_aliases
setMethod(
"featureArea", "XcmsExperimentHdf5",
function(object, mzmin = min, mzmax = max, rtmin = min,
rtmax = max, features = character(), msLevel = 1L) {
if (!hasFeatures(object, msLevel))
stop("No correspondence results available. Please run ",
"'groupChromPeaks' first.", call. = FALSE)
if (!length(features))
features <- rhdf5::h5read(object@hdf5_file,
paste0("/features/ms_", msLevel,
"/feature_definitions_rownames"),
drop = TRUE)
.h5_features_ms_region(
object, mzmin = mzmin, mzmax = mzmax, rtmin = rtmin,
rtmax = rtmax, features, ms_level = msLevel)
})

#' @rdname hidden_aliases
setReplaceMethod("featureDefinitions", "XcmsExperimentHdf5",
function(object, value) {
Expand Down
49 changes: 48 additions & 1 deletion tests/testthat/test_XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -763,4 +763,51 @@ test_that(".h5_x_chromatogram works", {
fts <- featureDefinitions(res)
})

rm(h5f_full_g)
test_that(".h5_features_ms_region_values works", {
tmpl <- rep(NA_real_, 20)
v <- as.numeric(1:14)
m <- c(2, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 9, 10, 11)
res <- .h5_features_ms_region_values(tmpl, v, m)
expect_true(is.na(res[1L]))
expect_true(all(is.na(res[12:20])))
expect_equal(res[2:11], c(1, 3, 4, 5, 6, 7, 8, 10, 13, 14))
res <- .h5_features_ms_region_values(tmpl, v, m, max)
expect_true(is.na(res[1L]))
expect_true(all(is.na(res[12:20])))
expect_equal(res[2:11], c(2, 3, 4, 5, 6, 7, 9, 12, 13, 14))
})

test_that(".h5_features_ms_region works", {
res <- xcms:::.h5_features_ms_region(
xmseg_full_h5, mzmin = min, mzmax = max, rtmin = min, rtmax = max,
features = rownames(featureDefinitions(xmseg_full_h5)),
ms_level = 1L)
expect_true(is.matrix(res))
expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax"))
expect_equal(nrow(res), nrow(featureDefinitions(xmseg_full_h5)))
expect_equal(rownames(res), rownames(featureDefinitions(xmseg_full_h5)))

res_sub <- xcms:::.h5_features_ms_region(
xmseg_full_h5, mzmin = min, mzmax = max, rtmin = min, rtmax = max,
features = rownames(res)[c(4, 10, 12)], ms_level = 1L)
expect_true(is.matrix(res_sub))
expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax"))
expect_equal(nrow(res_sub), 3)
expect_equal(res[c(4, 10, 12), ], res_sub)

res_sub <- xcms:::.h5_features_ms_region(
xmseg_full_h5, mzmin = min, mzmax = max, rtmin = min, rtmax = max,
features = rownames(res)[c(10, 12, 10, 4)], ms_level = 1L)
expect_true(is.matrix(res_sub))
expect_equal(colnames(res), c("mzmin", "mzmax", "rtmin", "rtmax"))
expect_equal(nrow(res_sub), 4)
expect_equal(res[c(10, 12, 10, 4), ], res_sub)

## errors
expect_error(xcms:::.h5_features_ms_region(
xmseg_full_h5, mzmin = min, mzmax = max, rtmin = min, rtmax = max,
features = c(rownames(res)[c(10, 12, 10, 4)], "a"), ms_level = 1L),
"not found")
})

rm(h5f_full_g)
13 changes: 13 additions & 0 deletions tests/testthat/test_XcmsExperimentHdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,19 @@ test_that("fillChromPeaks,XcmsExperimentHdf5 works", {
"No feature definitions")
})

test_that("featureArea,XcmsExperimentHdf5 works", {
expect_error(featureArea(xmse_h5), "No correspondence")
expect_error(featureArea(xmseg_full_h5, msLevel = 2L), "No correspondence")
expect_error(featureArea(xmseg_full_h5, features = c("a", "b")),
"Some of the provided")
res <- featureArea(xmseg_full_h5)
ref <- featureArea(xmseg_full_ref)
expect_equal(unname(res), unname(ref))
res <- featureArea(xmseg_full_h5, features = rownames(res)[c(5, 12, 20)])
ref <- featureArea(xmseg_full_ref, features = rownames(ref)[c(5, 12, 20)])
expect_equal(unname(res), unname(ref))
})

## test_that(".h5_feature_chrom_peaks_sample works", {
## cn <- .h5_chrom_peaks_colnames(xmseg_full_h5, 1L)
## res <- .h5_feature_chrom_peaks_sample("S3", xmseg_full_h5@hdf5_file,
Expand Down

0 comments on commit ad0650d

Please sign in to comment.