Skip to content

Commit a0c5345

Browse files
radiator 1.3.0 2024-02-22
* Bug fix using coverage and DArT data
1 parent c92cc45 commit a0c5345

File tree

6 files changed

+88
-32
lines changed

6 files changed

+88
-32
lines changed

DESCRIPTION

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: radiator
22
Type: Package
33
Title: RADseq Data Exploration, Manipulation and Visualization using R
4-
Version: 1.2.9
5-
Date: 2024-01-25
4+
Version: 1.3.0
5+
Date: 2024-02-22
66
Encoding: UTF-8
77
Authors@R: c(
88
person("Thierry", "Gosselin", email = "[email protected]", role = c("aut", "cre")),
@@ -56,7 +56,7 @@ License: GPL-3
5656
LazyLoad: yes
5757
VignetteBuilder:
5858
knitr
59-
RoxygenNote: 7.2.3
59+
RoxygenNote: 7.3.1
6060
ByteCompile: TRUE
6161
URL: https://thierrygosselin.github.io/radiator/
6262
BugReports: https://github.com/thierrygosselin/radiator/issues

NEWS.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# radiator 1.3.0 2024-02-22
2+
3+
* Bug fix using coverage and DArT data
4+
5+
16
# radiator 1.2.9 2024-01-25
27

38
* Bug fix stemming from genalex files and genind conversion

R/filter_coverage.R

+3-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ filter_coverage <- function(
186186
verbose = verbose)
187187

188188
# Verify that coverage information is present in the data...
189-
depth.info <- check_coverage(gds = data, stacks.haplo.check = TRUE, dart.check = TRUE)
189+
depth.info <- check_coverage(gds = data, genotypes.metadata.check = TRUE, stacks.haplo.check = TRUE, dart.check = TRUE)
190190
if (is.null(depth.info)) {
191191
message("\n\nCoverate information is not available for this dataset, returning GDS...")
192192
return(data)
@@ -204,6 +204,8 @@ filter_coverage <- function(
204204
individuals = FALSE,
205205
markers = TRUE,
206206
missing = FALSE,
207+
coverage = TRUE,
208+
# allele.coverage = TRUE,
207209
allele.coverage = FALSE,
208210
mac = FALSE,
209211
heterozygosity = FALSE,

R/gds.R

+53-23
Original file line numberDiff line numberDiff line change
@@ -967,9 +967,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
967967
# stacks haplotype vcf have the info fields for depth in the VCF header
968968
# but they do not have the info with genotypes...
969969
# it's laziness from stacks...
970+
data.source <- extract_data_source(gds)
970971

971972
if (stacks.haplo.check) {
972-
data.source <- extract_data_source(gds)
973973
biallelic <- radiator::detect_biallelic_markers(data = gds)
974974
biallelic <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(
975975
node = gds, path = "radiator/biallelic", silent = TRUE))
@@ -989,7 +989,10 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
989989
markers.meta.select = c("AVG_COUNT_REF", "AVG_COUNT_SNP"),
990990
whitelist = TRUE
991991
)
992-
if (!is.null(got.coverage)) got.coverage <- c("AVG_COUNT_REF", "AVG_COUNT_SNP")
992+
if (!is.null(got.coverage)) {
993+
got.coverage <- c("AVG_COUNT_REF", "AVG_COUNT_SNP")
994+
return(got.coverage)
995+
}
993996
}#End DART 1row and 2 rows
994997
}
995998

@@ -1006,6 +1009,7 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
10061009
# this part might generate an error if you actually have genotypes metadata...
10071010
# need to run tests...
10081011
got.coverage <- geno.index
1012+
return(got.coverage)
10091013
}
10101014
geno.index <- NULL
10111015
}
@@ -1018,13 +1022,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
10181022
check = "none", verbose = FALSE)$ID
10191023

10201024
if (length(have) > 0) {
1021-
# if (!exhaustive) {
1022-
# want <- c("DP", "CATG")
1023-
# } else {
10241025
want <- c("DP", "AD", "CATG")
1025-
# }
1026-
10271026
got.coverage <- purrr::keep(.x = have, .p = have %in% want)
1027+
return(got.coverage)
10281028
} else {
10291029
got.coverage <- NULL
10301030
}
@@ -2461,7 +2461,7 @@ generate_stats <- function(
24612461
genotypes.metadata.check = TRUE,
24622462
dart.check = TRUE
24632463
)
2464-
if (!"DP" %in% got.coverage) coverage <- FALSE
2464+
if (!"DP" %in% got.coverage && !"READ_DEPTH" %in% got.coverage) coverage <- FALSE
24652465
if (!"AD" %in% got.coverage) allele.coverage <- FALSE
24662466
if (!exhaustive) allele.coverage <- FALSE
24672467
got.coverage <- NULL
@@ -2496,28 +2496,52 @@ generate_stats <- function(
24962496
replacement = c("COVERAGE_TOTAL", "COVERAGE_MEAN", "COVERAGE_MEDIAN", "COVERAGE_IQR"),
24972497
vectorize_all = FALSE
24982498
)
2499+
data.source <- radiator::extract_data_source(gds = gds)
2500+
2501+
if ("dart" %in% data.source) {
2502+
dart.data <- radiator::extract_genotypes_metadata(
2503+
gds = gds,
2504+
genotypes.meta.select = c("M_SEQ", "ID_SEQ", "READ_DEPTH"),
2505+
whitelist = TRUE
2506+
) %>%
2507+
radiator::rad_wide(
2508+
x = .,
2509+
formula = "ID_SEQ ~ M_SEQ",
2510+
values_from = "READ_DEPTH"
2511+
) %>%
2512+
dplyr::select(-ID_SEQ)
2513+
colnames(dart.data) <- NULL
2514+
dart.data <- as.matrix(dart.data)
2515+
} else {
2516+
dart.data <- NULL
2517+
}
24992518

25002519
if (markers) {
2501-
dp_f_m <- function(gds, coverage.stats) {
2502-
2520+
dp_f_m <- function(gds, coverage.stats, dart.data) {
25032521
# Using switch instead was not optimal for additional options in the func...
25042522
if (coverage.stats == "sum") rad_cov_stats <- function(x) round(sum(x, na.rm = TRUE))
25052523
if (coverage.stats == "mean") rad_cov_stats <- function(x) round(mean(x, na.rm = TRUE))
25062524
if (coverage.stats == "median") rad_cov_stats <- function(x) round(stats::median(x, na.rm = TRUE))
25072525
# if (coverage.stats == "iqr") rad_cov_stats <- function(x) round(abs(diff(stats::quantile(x, probs = c(0.25, 0.75), na.rm = TRUE))))
25082526
if (coverage.stats == "iqr") rad_cov_stats <- function(x) round(matrixStats::iqr(x, na.rm = TRUE)) # faster
25092527

2510-
x <- SeqArray::seqApply(
2511-
gdsfile = gds,
2512-
var.name = "annotation/format/DP",
2513-
FUN = rad_cov_stats,
2514-
as.is = "integer",
2515-
margin = "by.variant",
2516-
parallel = TRUE
2517-
)
2528+
if (!is.null(dart.data)) {
2529+
x <- as.integer(apply(X = dart.data, MARGIN = 2, FUN = rad_cov_stats))
2530+
dart.data <- NULL
2531+
} else {
2532+
x <- SeqArray::seqApply(
2533+
gdsfile = gds,
2534+
var.name = "annotation/format/DP",
2535+
FUN = rad_cov_stats,
2536+
as.is = "integer",
2537+
margin = "by.variant",
2538+
parallel = TRUE
2539+
)
2540+
}
2541+
return(x)
25182542
}
25192543

2520-
dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds)
2544+
dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds, dart.data = dart.data)
25212545
}
25222546

25232547
if (individuals) {
@@ -2527,10 +2551,16 @@ generate_stats <- function(
25272551
# Note to myself: the huge speed gain by using other packages robustbse, Rfast, etc.
25282552
# is not worth the unreliability of the results check your testing files...
25292553

2530-
dp.temp <- SeqArray::seqGetData(
2531-
gdsfile = gds,
2532-
var.name = "annotation/format/DP"
2533-
)
2554+
if ("dart" %in% data.source) {
2555+
dp.temp <- dart.data
2556+
dart.data <- NULL
2557+
} else {
2558+
dp.temp <- SeqArray::seqGetData(
2559+
gdsfile = gds,
2560+
var.name = "annotation/format/DP"
2561+
)
2562+
}
2563+
25342564

25352565
dp_f_i <- function(coverage.stats, x) {
25362566
if ("sum" %in% coverage.stats) x <- rowSums(x, na.rm = TRUE)

R/utils.R

+22-3
Original file line numberDiff line numberDiff line change
@@ -977,9 +977,28 @@ strip_rad <- function(
977977
) {
978978
objs <- utils::object.size(x)
979979

980-
# STRATA ----------
981-
strata.n <- intersect(colnames(x), c("STRATA", "POP_ID"))
980+
# Check if ID_SEQ, STRATA_SEQ and M_SEQ present...
981+
if (rlang::has_name(x, "ID_SEQ") && rlang::has_name(x, "INDIVIDUALS")) {
982+
x %<>% dplyr::select(-ID_SEQ)
983+
}
984+
985+
if (rlang::has_name(x, "STRATA_SEQ")) {
986+
strata.n <- intersect(colnames(x), c("STRATA", "POP_ID"))
987+
if (length(strata.n) > 0 && strata.n %in% c("STRATA", "POP_ID")) {
988+
x %<>% dplyr::select(-STRATA_SEQ)
989+
}
990+
}
982991

992+
if (rlang::has_name(x, "M_SEQ")) {
993+
markers.n <- intersect(colnames(x), c("VARIANT_ID", "CHROM", "LOCUS", "POS", "MARKERS"))
994+
if (length(markers.n) > 0) {
995+
x %<>% dplyr::select(-M_SEQ)
996+
}
997+
}
998+
strata.n <- markers.n <- NULL
999+
1000+
1001+
# STRATA ----------
9831002
if (rlang::has_name(x, "POP_ID")) {
9841003
strata <- radiator::generate_strata(data = x, pop.id = TRUE) %>%
9851004
dplyr::mutate(
@@ -1009,7 +1028,7 @@ strip_rad <- function(
10091028
pos = env.arg,
10101029
envir = env.arg
10111030
)
1012-
cm <- keep.strata <- pop.id <- strata.n <- strata <- NULL
1031+
cm <- keep.strata <- pop.id <- strata <- NULL
10131032

10141033
# MARKERS ---------
10151034
x %<>%

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ state and is being actively
77
developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
88
[![minimal R
99
version](https://img.shields.io/badge/R%3E%3D-NA-6666ff.svg)](https://cran.r-project.org/)
10-
[![packageversion](https://img.shields.io/badge/Package%20version-1.2.9-orange.svg)](commits/master)
11-
[![Last-changedate](https://img.shields.io/badge/last%20change-2024--01--25-brightgreen.svg)](/commits/master)
10+
[![packageversion](https://img.shields.io/badge/Package%20version-1.3.0-orange.svg)](commits/master)
11+
[![Last-changedate](https://img.shields.io/badge/last%20change-2024--02--22-brightgreen.svg)](/commits/master)
1212
[![R-CMD-check](https://github.com/thierrygosselin/radiator/workflows/R-CMD-check/badge.svg)](https://github.com/thierrygosselin/radiator/actions)
1313
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3687060.svg)](https://doi.org/10.5281/zenodo.3687060)
1414
<!-- badges: end -->

0 commit comments

Comments
 (0)