@@ -967,9 +967,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
967
967
# stacks haplotype vcf have the info fields for depth in the VCF header
968
968
# but they do not have the info with genotypes...
969
969
# it's laziness from stacks...
970
+ data.source <- extract_data_source(gds )
970
971
971
972
if (stacks.haplo.check ) {
972
- data.source <- extract_data_source(gds )
973
973
biallelic <- radiator :: detect_biallelic_markers(data = gds )
974
974
biallelic <- gdsfmt :: read.gdsn(gdsfmt :: index.gdsn(
975
975
node = gds , path = " radiator/biallelic" , silent = TRUE ))
@@ -989,7 +989,10 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
989
989
markers.meta.select = c(" AVG_COUNT_REF" , " AVG_COUNT_SNP" ),
990
990
whitelist = TRUE
991
991
)
992
- if (! is.null(got.coverage )) got.coverage <- c(" AVG_COUNT_REF" , " AVG_COUNT_SNP" )
992
+ if (! is.null(got.coverage )) {
993
+ got.coverage <- c(" AVG_COUNT_REF" , " AVG_COUNT_SNP" )
994
+ return (got.coverage )
995
+ }
993
996
}# End DART 1row and 2 rows
994
997
}
995
998
@@ -1006,6 +1009,7 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
1006
1009
# this part might generate an error if you actually have genotypes metadata...
1007
1010
# need to run tests...
1008
1011
got.coverage <- geno.index
1012
+ return (got.coverage )
1009
1013
}
1010
1014
geno.index <- NULL
1011
1015
}
@@ -1018,13 +1022,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c
1018
1022
check = " none" , verbose = FALSE )$ ID
1019
1023
1020
1024
if (length(have ) > 0 ) {
1021
- # if (!exhaustive) {
1022
- # want <- c("DP", "CATG")
1023
- # } else {
1024
1025
want <- c(" DP" , " AD" , " CATG" )
1025
- # }
1026
-
1027
1026
got.coverage <- purrr :: keep(.x = have , .p = have %in% want )
1027
+ return (got.coverage )
1028
1028
} else {
1029
1029
got.coverage <- NULL
1030
1030
}
@@ -2461,7 +2461,7 @@ generate_stats <- function(
2461
2461
genotypes.metadata.check = TRUE ,
2462
2462
dart.check = TRUE
2463
2463
)
2464
- if (! " DP" %in% got.coverage ) coverage <- FALSE
2464
+ if (! " DP" %in% got.coverage && ! " READ_DEPTH " %in% got.coverage ) coverage <- FALSE
2465
2465
if (! " AD" %in% got.coverage ) allele.coverage <- FALSE
2466
2466
if (! exhaustive ) allele.coverage <- FALSE
2467
2467
got.coverage <- NULL
@@ -2496,28 +2496,52 @@ generate_stats <- function(
2496
2496
replacement = c(" COVERAGE_TOTAL" , " COVERAGE_MEAN" , " COVERAGE_MEDIAN" , " COVERAGE_IQR" ),
2497
2497
vectorize_all = FALSE
2498
2498
)
2499
+ data.source <- radiator :: extract_data_source(gds = gds )
2500
+
2501
+ if (" dart" %in% data.source ) {
2502
+ dart.data <- radiator :: extract_genotypes_metadata(
2503
+ gds = gds ,
2504
+ genotypes.meta.select = c(" M_SEQ" , " ID_SEQ" , " READ_DEPTH" ),
2505
+ whitelist = TRUE
2506
+ ) %> %
2507
+ radiator :: rad_wide(
2508
+ x = . ,
2509
+ formula = " ID_SEQ ~ M_SEQ" ,
2510
+ values_from = " READ_DEPTH"
2511
+ ) %> %
2512
+ dplyr :: select(- ID_SEQ )
2513
+ colnames(dart.data ) <- NULL
2514
+ dart.data <- as.matrix(dart.data )
2515
+ } else {
2516
+ dart.data <- NULL
2517
+ }
2499
2518
2500
2519
if (markers ) {
2501
- dp_f_m <- function (gds , coverage.stats ) {
2502
-
2520
+ dp_f_m <- function (gds , coverage.stats , dart.data ) {
2503
2521
# Using switch instead was not optimal for additional options in the func...
2504
2522
if (coverage.stats == " sum" ) rad_cov_stats <- function (x ) round(sum(x , na.rm = TRUE ))
2505
2523
if (coverage.stats == " mean" ) rad_cov_stats <- function (x ) round(mean(x , na.rm = TRUE ))
2506
2524
if (coverage.stats == " median" ) rad_cov_stats <- function (x ) round(stats :: median(x , na.rm = TRUE ))
2507
2525
# if (coverage.stats == "iqr") rad_cov_stats <- function(x) round(abs(diff(stats::quantile(x, probs = c(0.25, 0.75), na.rm = TRUE))))
2508
2526
if (coverage.stats == " iqr" ) rad_cov_stats <- function (x ) round(matrixStats :: iqr(x , na.rm = TRUE )) # faster
2509
2527
2510
- x <- SeqArray :: seqApply(
2511
- gdsfile = gds ,
2512
- var.name = " annotation/format/DP" ,
2513
- FUN = rad_cov_stats ,
2514
- as.is = " integer" ,
2515
- margin = " by.variant" ,
2516
- parallel = TRUE
2517
- )
2528
+ if (! is.null(dart.data )) {
2529
+ x <- as.integer(apply(X = dart.data , MARGIN = 2 , FUN = rad_cov_stats ))
2530
+ dart.data <- NULL
2531
+ } else {
2532
+ x <- SeqArray :: seqApply(
2533
+ gdsfile = gds ,
2534
+ var.name = " annotation/format/DP" ,
2535
+ FUN = rad_cov_stats ,
2536
+ as.is = " integer" ,
2537
+ margin = " by.variant" ,
2538
+ parallel = TRUE
2539
+ )
2540
+ }
2541
+ return (x )
2518
2542
}
2519
2543
2520
- dp.m <- purrr :: map_dfc(.x = coverage.stats.l , .f = dp_f_m , gds = gds )
2544
+ dp.m <- purrr :: map_dfc(.x = coverage.stats.l , .f = dp_f_m , gds = gds , dart.data = dart.data )
2521
2545
}
2522
2546
2523
2547
if (individuals ) {
@@ -2527,10 +2551,16 @@ generate_stats <- function(
2527
2551
# Note to myself: the huge speed gain by using other packages robustbse, Rfast, etc.
2528
2552
# is not worth the unreliability of the results check your testing files...
2529
2553
2530
- dp.temp <- SeqArray :: seqGetData(
2531
- gdsfile = gds ,
2532
- var.name = " annotation/format/DP"
2533
- )
2554
+ if (" dart" %in% data.source ) {
2555
+ dp.temp <- dart.data
2556
+ dart.data <- NULL
2557
+ } else {
2558
+ dp.temp <- SeqArray :: seqGetData(
2559
+ gdsfile = gds ,
2560
+ var.name = " annotation/format/DP"
2561
+ )
2562
+ }
2563
+
2534
2564
2535
2565
dp_f_i <- function (coverage.stats , x ) {
2536
2566
if (" sum" %in% coverage.stats ) x <- rowSums(x , na.rm = TRUE )
0 commit comments