Skip to content

Commit 1ade09d

Browse files
# radiator v.0.0.16 2018-09-04
* `tidy_vcf`, `tidy_genomic_data` and `genomic_converter`: way faster with huge VCF * `write_fineradstructure`: fix bug when data was from DArT
1 parent 148bc67 commit 1ade09d

19 files changed

+1007
-679
lines changed

DESCRIPTION

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: radiator
22
Type: Package
33
Title: RADseq Data Exploration, Manipulation and Visualization using R
4-
Version: 0.0.15
5-
Date: 2018-08-17
4+
Version: 0.0.16
5+
Date: 2018-09-04
66
Encoding: UTF-8
77
Authors@R: c(
88
person("Thierry", "Gosselin", email = "[email protected]", role = c("aut", "cre")),
@@ -12,7 +12,7 @@ Authors@R: c(
1212
Maintainer: Thierry Gosselin <[email protected]>
1313
Description: radiator: an R package for RADseq Data Exploration, Manipulation and Visualization.
1414
Depends:
15-
R (>= 3.4.0)
15+
R (>= 3.5.0)
1616
Imports:
1717
amap,
1818
broom,

NAMESPACE

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ export(keep_common_markers)
7070
export(mclapply_win)
7171
export(merge_dart)
7272
export(merge_vcf)
73-
export(parse_genomic)
73+
export(parse_gds_metadata)
7474
export(pi)
7575
export(plot_bayescan)
7676
export(plot_boxplot_coverage)

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# radiator v.0.0.16 2018-09-04
2+
3+
* `tidy_vcf`, `tidy_genomic_data` and `genomic_converter`: way faster with huge VCF
4+
* `write_fineradstructure`: fix bug when data was from DArT
5+
6+
17
# radiator v.0.0.15 2018-08-17
28

39
* `genomic_converter`, `tidy_genomic_data`: bug fix when individuals are integers

R/change_alleles.R

+38-2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
#' during execution.
2828
#' Default: \code{verbose = FALSE}.
2929

30+
#' @param ... (optional) To pass further argument for fine-tuning the tidying
31+
#' (details below).
32+
33+
3034
#' @return
3135
#' Depending if the input file is biallelic or multiallelic,
3236
#' the function will output additional to REF and ALT column several genotype codings:
@@ -65,12 +69,19 @@ change_alleles <- function(
6569
data,
6670
biallelic = NULL,
6771
parallel.core = parallel::detectCores() - 1,
68-
verbose = FALSE) {
72+
verbose = FALSE,
73+
...
74+
) {
6975

7076
# test
7177
# biallelic = NULL
7278
# parallel.core = parallel::detectCores() - 1
7379
# verbose = TRUE
80+
# gt.vcf.nuc <- TRUE
81+
# gt.vcf <- TRUE
82+
# gt <- TRUE
83+
# gt.bin <- TRUE
84+
7485

7586
# Checking for missing and/or default arguments ------------------------------
7687
if (missing(data)) stop("Input file missing")
@@ -80,6 +91,31 @@ change_alleles <- function(
8091
data <- dplyr::rename(.data = data, MARKERS = LOCUS)
8192
}
8293

94+
# dotslist -------------------------------------------------------------------
95+
dotslist <- list(...)
96+
want <- c("gt.vcf.nuc", "gt.vcf", "gt", "gt.bin")
97+
unknowned_param <- setdiff(names(dotslist), want)
98+
99+
if (length(unknowned_param) > 0) {
100+
stop("Unknowned \"...\" parameters ",
101+
stringi::stri_join(unknowned_param, collapse = " "))
102+
}
103+
104+
radiator.dots <- dotslist[names(dotslist) %in% want]
105+
gt.vcf.nuc <- radiator.dots[["gt.vcf.nuc"]]
106+
gt.vcf <- radiator.dots[["gt.vcf"]]
107+
gt <- radiator.dots[["gt"]]
108+
gt.bin <- radiator.dots[["gt.bin"]]
109+
110+
if (is.null(gt.vcf.nuc)) gt.vcf.nuc <- TRUE
111+
if (is.null(gt.vcf)) gt.vcf <- TRUE
112+
if (is.null(gt)) gt <- TRUE
113+
if (is.null(gt.bin)) gt.bin <- TRUE
114+
115+
if (!gt.vcf.nuc && !gt) {
116+
stop("At least one of gt.vcf.nuc or gt must be TRUE")
117+
}
118+
83119
# get number of markers
84120
n.catalog.locus <- dplyr::n_distinct(data$MARKERS)
85121

@@ -160,7 +196,7 @@ change_alleles <- function(
160196
inversion <- FALSE
161197
}
162198
old.ref <- NULL
163-
message(" number of markers with REF/ALT change(s) = ", nrow(change.ref))
199+
message("\nNumber of markers with REF/ALT change(s) = ", nrow(change.ref))
164200
} else {
165201
inversion <- FALSE
166202
}

R/filter_rad.R

+9-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,15 @@ filter_rad <- function(
302302
pop.select = pop.select,
303303
blacklist.id = blacklist.id,
304304
parallel.core = parallel.core,
305-
verbose = FALSE)
305+
verbose = FALSE,
306+
vcf.stats = TRUE,
307+
snp.read.position.filter = NULL,
308+
mac.threshold = NULL,
309+
gt.vcf.nuc = TRUE,
310+
gt.vcf = TRUE,
311+
gt = TRUE,
312+
gt.bin = TRUE,
313+
keep.gds = FALSE)
306314

307315

308316
# Keep GT_BIN

R/genomic_converter.R

+11-2
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ genomic_converter <- function(
237237
verbose = TRUE,
238238
...
239239
) {
240+
240241
if (verbose) {
241242
cat("#######################################################################\n")
242243
cat("##################### radiator::genomic_converter #####################\n")
@@ -427,7 +428,15 @@ devtools::install_github('ericarcher/strataG', build_vignettes = TRUE)")
427428
pop.select = pop.select,
428429
filename = NULL,
429430
verbose = FALSE,
430-
keep.allele.names = keep.allele.names
431+
keep.allele.names = keep.allele.names,
432+
vcf.stats = TRUE,
433+
snp.read.position.filter = NULL,
434+
mac.threshold = NULL,
435+
gt.vcf.nuc = TRUE,
436+
gt.vcf = TRUE,
437+
gt = TRUE,
438+
gt.bin = TRUE,
439+
keep.gds = FALSE
431440
)
432441

433442
if(verbose) message("\nPreparing data for output\n")
@@ -473,7 +482,7 @@ devtools::install_github('ericarcher/strataG', build_vignettes = TRUE)")
473482
vectorize_all = FALSE
474483
)
475484
} else {
476-
message("IMPORTANT: you have > 20 000 markers (", marker.number, ")",
485+
message("\nIMPORTANT: you have > 20 000 markers (", marker.number, ")",
477486
"\nDo you want the more suitable genlight object instead of the current genind? (y/n):")
478487
overide.genind <- as.character(readLines(n = 1))
479488
if (overide.genind == "y") {

R/global_variables.R

+2-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ if (getRversion() >= "2.15.1") {
8888
"VALUES", "TOTAL_READ_COUNTS", "aswer.opt", "markers.meta", "vcf.connection",
8989
"ALT_COUNT", "INDIVIDUALS_VCF", "MAC", "MAC_FILTER", "REF_COUNT",
9090
"SNP_PER_LOCUS_MAC", "SNP_POS_READ_IQR", "SNP_POS_READ_OUTLIERS",
91-
"SNP_POS_READ_Q75", "VARIANT_ID"
91+
"SNP_POS_READ_Q75", "VARIANT_ID", "genotypes", "NEW_POP", "NEW_INDIVIDUALS",
92+
"biallelic"
9293
)
9394
)
9495
}

R/summary_strata.R

+11-1
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,22 @@ read_strata <- function(strata, pop.id = FALSE,
5959
trim_ws = TRUE))
6060
}
6161
blacklist.id$INDIVIDUALS <- clean_ind_names(blacklist.id$INDIVIDUALS)
62+
63+
64+
# remove potential duplicate id
65+
dup <- dplyr::distinct(.data = blacklist.id, INDIVIDUALS)
66+
blacklist.id.dup <- nrow(blacklist.id) - nrow(dup)
67+
if (blacklist.id.dup >1) {
68+
message("Duplicate id's in blacklist: ", blacklist.id.dup)
69+
blacklist.id <- dup
70+
}
71+
dup <- blacklist.id.dup <- NULL
6272
n.ind.blacklist <- length(blacklist.id$INDIVIDUALS)
6373
if (verbose) message("\nNumber of individuals in blacklist: ", n.ind.blacklist, " ind.")
6474
n.ind.blacklisted <- length(strata$INDIVIDUALS %in% blacklist.id$INDIVIDUALS)
6575
strata <- dplyr::filter(strata, !INDIVIDUALS %in% blacklist.id$INDIVIDUALS)
6676
if (verbose) message("\nBlacklisted individuals: ", n.ind.blacklisted, " ind.")
67-
}
77+
}
6878

6979

7080
# manage levels, labels and pop.select ---------------------------------------

R/tidy_genomic_data.R

+51-9
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,9 @@ tidy_genomic_data <- function(
407407

408408
# dotslist -------------------------------------------------------------------
409409
dotslist <- list(...)
410-
want <- c("keep.allele.names")
410+
want <- c("keep.allele.names", "snp.read.position.filter", "mac.threshold",
411+
"ref.calibration", "gt.vcf.nuc", "gt.vcf", "gt", "gt.bin", "vcf.stats",
412+
"keep.gds")
411413
unknowned_param <- setdiff(names(dotslist), want)
412414

413415
if (length(unknowned_param) > 0) {
@@ -419,7 +421,36 @@ tidy_genomic_data <- function(
419421
keep.allele.names <- radiator.dots[["keep.allele.names"]]
420422

421423
if (is.null(keep.allele.names)) keep.allele.names <- FALSE
424+
snp.read.position.filter <- radiator.dots[["snp.read.position.filter"]]
425+
mac.threshold <- radiator.dots[["mac.threshold"]]
426+
ref.calibration <- radiator.dots[["ref.calibration"]]
427+
gt.vcf.nuc <- radiator.dots[["gt.vcf.nuc"]]
428+
gt.vcf <- radiator.dots[["gt.vcf"]]
429+
gt <- radiator.dots[["gt"]]
430+
gt.bin <- radiator.dots[["gt.bin"]]
431+
vcf.stats <- radiator.dots[["vcf.stats"]]
432+
filename <- radiator.dots[["filename"]]
433+
keep.gds <- radiator.dots[["keep.gds"]]
434+
435+
if (is.null(keep.gds)) keep.gds <- TRUE
436+
if (is.null(vcf.stats)) vcf.stats <- TRUE
437+
if (is.null(ref.calibration)) ref.calibration <- FALSE
438+
if (is.null(gt.vcf.nuc)) gt.vcf.nuc <- TRUE
439+
if (is.null(gt.vcf)) gt.vcf <- TRUE
440+
if (is.null(gt)) gt <- TRUE
441+
if (is.null(gt.bin)) gt.bin <- TRUE
442+
443+
444+
if (!gt.vcf.nuc && !gt) {
445+
stop("At least one of gt.vcf.nuc or gt must be TRUE")
446+
}
422447

448+
if (!is.null(snp.read.position.filter)) {
449+
snp.read.position.filter <- match.arg(
450+
arg = snp.read.position.filter,
451+
choices = c("outliers", "iqr", "q75"),
452+
several.ok = TRUE)
453+
}
423454

424455
# File type detection----------------------------------------------------------
425456
skip.tidy.wide <- FALSE # initiate for data frame below
@@ -526,8 +557,19 @@ tidy_genomic_data <- function(
526557
blacklist.id = blacklist.id,
527558
pop.select = pop.select,
528559
pop.levels = pop.levels,
529-
pop.labels = pop.labels
530-
)
560+
pop.labels = pop.labels,
561+
filename = NULL,
562+
vcf.stats = TRUE,
563+
snp.read.position.filter = NULL,
564+
mac.threshold = NULL,
565+
gt.vcf.nuc = TRUE,
566+
gt.vcf = TRUE,
567+
gt = TRUE,
568+
gt.bin = TRUE,
569+
keep.gds = FALSE
570+
) %$%
571+
genotypes
572+
531573
biallelic <- radiator::detect_biallelic_markers(input)
532574
} # End import VCF
533575

@@ -1167,12 +1209,12 @@ tidy_genomic_data <- function(
11671209

11681210
# Minor Allele Frequency filter ----------------------------------------------
11691211
if (!is.null(maf.thresholds)) { # with MAF
1170-
input <- radiator::filter_maf(
1171-
data = input,
1172-
interactive.filter = FALSE,
1173-
maf.thresholds = maf.thresholds,
1174-
parallel.core = parallel.core,
1175-
verbose = FALSE)$tidy.filtered.maf
1212+
input <- radiator::filter_maf(
1213+
data = input,
1214+
interactive.filter = FALSE,
1215+
maf.thresholds = maf.thresholds,
1216+
parallel.core = parallel.core,
1217+
verbose = FALSE)$tidy.filtered.maf
11761218
} # End of MAF filters
11771219

11781220

0 commit comments

Comments
 (0)