Skip to content

Commit

Permalink
# radiator v.0.0.16 2018-09-04
Browse files Browse the repository at this point in the history
* `tidy_vcf`, `tidy_genomic_data` and `genomic_converter`: way faster with huge VCF
* `write_fineradstructure`: fix bug when data was from DArT
  • Loading branch information
thierrygosselin committed Sep 4, 2018
1 parent 148bc67 commit 1ade09d
Show file tree
Hide file tree
Showing 19 changed files with 1,007 additions and 679 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: radiator
Type: Package
Title: RADseq Data Exploration, Manipulation and Visualization using R
Version: 0.0.15
Date: 2018-08-17
Version: 0.0.16
Date: 2018-09-04
Encoding: UTF-8
Authors@R: c(
person("Thierry", "Gosselin", email = "[email protected]", role = c("aut", "cre")),
Expand All @@ -12,7 +12,7 @@ Authors@R: c(
Maintainer: Thierry Gosselin <[email protected]>
Description: radiator: an R package for RADseq Data Exploration, Manipulation and Visualization.
Depends:
R (>= 3.4.0)
R (>= 3.5.0)
Imports:
amap,
broom,
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export(keep_common_markers)
export(mclapply_win)
export(merge_dart)
export(merge_vcf)
export(parse_genomic)
export(parse_gds_metadata)
export(pi)
export(plot_bayescan)
export(plot_boxplot_coverage)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# radiator v.0.0.16 2018-09-04

* `tidy_vcf`, `tidy_genomic_data` and `genomic_converter`: way faster with huge VCF
* `write_fineradstructure`: fix bug when data was from DArT


# radiator v.0.0.15 2018-08-17

* `genomic_converter`, `tidy_genomic_data`: bug fix when individuals are integers
Expand Down
40 changes: 38 additions & 2 deletions R/change_alleles.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#' during execution.
#' Default: \code{verbose = FALSE}.

#' @param ... (optional) To pass further argument for fine-tuning the tidying
#' (details below).


#' @return
#' Depending if the input file is biallelic or multiallelic,
#' the function will output additional to REF and ALT column several genotype codings:
Expand Down Expand Up @@ -65,12 +69,19 @@ change_alleles <- function(
data,
biallelic = NULL,
parallel.core = parallel::detectCores() - 1,
verbose = FALSE) {
verbose = FALSE,
...
) {

# test
# biallelic = NULL
# parallel.core = parallel::detectCores() - 1
# verbose = TRUE
# gt.vcf.nuc <- TRUE
# gt.vcf <- TRUE
# gt <- TRUE
# gt.bin <- TRUE


# Checking for missing and/or default arguments ------------------------------
if (missing(data)) stop("Input file missing")
Expand All @@ -80,6 +91,31 @@ change_alleles <- function(
data <- dplyr::rename(.data = data, MARKERS = LOCUS)
}

# dotslist -------------------------------------------------------------------
dotslist <- list(...)
want <- c("gt.vcf.nuc", "gt.vcf", "gt", "gt.bin")
unknowned_param <- setdiff(names(dotslist), want)

if (length(unknowned_param) > 0) {
stop("Unknowned \"...\" parameters ",
stringi::stri_join(unknowned_param, collapse = " "))
}

radiator.dots <- dotslist[names(dotslist) %in% want]
gt.vcf.nuc <- radiator.dots[["gt.vcf.nuc"]]
gt.vcf <- radiator.dots[["gt.vcf"]]
gt <- radiator.dots[["gt"]]
gt.bin <- radiator.dots[["gt.bin"]]

if (is.null(gt.vcf.nuc)) gt.vcf.nuc <- TRUE
if (is.null(gt.vcf)) gt.vcf <- TRUE
if (is.null(gt)) gt <- TRUE
if (is.null(gt.bin)) gt.bin <- TRUE

if (!gt.vcf.nuc && !gt) {
stop("At least one of gt.vcf.nuc or gt must be TRUE")
}

# get number of markers
n.catalog.locus <- dplyr::n_distinct(data$MARKERS)

Expand Down Expand Up @@ -160,7 +196,7 @@ change_alleles <- function(
inversion <- FALSE
}
old.ref <- NULL
message(" number of markers with REF/ALT change(s) = ", nrow(change.ref))
message("\nNumber of markers with REF/ALT change(s) = ", nrow(change.ref))
} else {
inversion <- FALSE
}
Expand Down
10 changes: 9 additions & 1 deletion R/filter_rad.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,15 @@ filter_rad <- function(
pop.select = pop.select,
blacklist.id = blacklist.id,
parallel.core = parallel.core,
verbose = FALSE)
verbose = FALSE,
vcf.stats = TRUE,
snp.read.position.filter = NULL,
mac.threshold = NULL,
gt.vcf.nuc = TRUE,
gt.vcf = TRUE,
gt = TRUE,
gt.bin = TRUE,
keep.gds = FALSE)


# Keep GT_BIN
Expand Down
13 changes: 11 additions & 2 deletions R/genomic_converter.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ genomic_converter <- function(
verbose = TRUE,
...
) {

if (verbose) {
cat("#######################################################################\n")
cat("##################### radiator::genomic_converter #####################\n")
Expand Down Expand Up @@ -427,7 +428,15 @@ devtools::install_github('ericarcher/strataG', build_vignettes = TRUE)")
pop.select = pop.select,
filename = NULL,
verbose = FALSE,
keep.allele.names = keep.allele.names
keep.allele.names = keep.allele.names,
vcf.stats = TRUE,
snp.read.position.filter = NULL,
mac.threshold = NULL,
gt.vcf.nuc = TRUE,
gt.vcf = TRUE,
gt = TRUE,
gt.bin = TRUE,
keep.gds = FALSE
)

if(verbose) message("\nPreparing data for output\n")
Expand Down Expand Up @@ -473,7 +482,7 @@ devtools::install_github('ericarcher/strataG', build_vignettes = TRUE)")
vectorize_all = FALSE
)
} else {
message("IMPORTANT: you have > 20 000 markers (", marker.number, ")",
message("\nIMPORTANT: you have > 20 000 markers (", marker.number, ")",
"\nDo you want the more suitable genlight object instead of the current genind? (y/n):")
overide.genind <- as.character(readLines(n = 1))
if (overide.genind == "y") {
Expand Down
3 changes: 2 additions & 1 deletion R/global_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ if (getRversion() >= "2.15.1") {
"VALUES", "TOTAL_READ_COUNTS", "aswer.opt", "markers.meta", "vcf.connection",
"ALT_COUNT", "INDIVIDUALS_VCF", "MAC", "MAC_FILTER", "REF_COUNT",
"SNP_PER_LOCUS_MAC", "SNP_POS_READ_IQR", "SNP_POS_READ_OUTLIERS",
"SNP_POS_READ_Q75", "VARIANT_ID"
"SNP_POS_READ_Q75", "VARIANT_ID", "genotypes", "NEW_POP", "NEW_INDIVIDUALS",
"biallelic"
)
)
}
12 changes: 11 additions & 1 deletion R/summary_strata.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,22 @@ read_strata <- function(strata, pop.id = FALSE,
trim_ws = TRUE))
}
blacklist.id$INDIVIDUALS <- clean_ind_names(blacklist.id$INDIVIDUALS)


# remove potential duplicate id
dup <- dplyr::distinct(.data = blacklist.id, INDIVIDUALS)
blacklist.id.dup <- nrow(blacklist.id) - nrow(dup)
if (blacklist.id.dup >1) {
message("Duplicate id's in blacklist: ", blacklist.id.dup)
blacklist.id <- dup
}
dup <- blacklist.id.dup <- NULL
n.ind.blacklist <- length(blacklist.id$INDIVIDUALS)
if (verbose) message("\nNumber of individuals in blacklist: ", n.ind.blacklist, " ind.")
n.ind.blacklisted <- length(strata$INDIVIDUALS %in% blacklist.id$INDIVIDUALS)
strata <- dplyr::filter(strata, !INDIVIDUALS %in% blacklist.id$INDIVIDUALS)
if (verbose) message("\nBlacklisted individuals: ", n.ind.blacklisted, " ind.")
}
}


# manage levels, labels and pop.select ---------------------------------------
Expand Down
60 changes: 51 additions & 9 deletions R/tidy_genomic_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,9 @@ tidy_genomic_data <- function(

# dotslist -------------------------------------------------------------------
dotslist <- list(...)
want <- c("keep.allele.names")
want <- c("keep.allele.names", "snp.read.position.filter", "mac.threshold",
"ref.calibration", "gt.vcf.nuc", "gt.vcf", "gt", "gt.bin", "vcf.stats",
"keep.gds")
unknowned_param <- setdiff(names(dotslist), want)

if (length(unknowned_param) > 0) {
Expand All @@ -419,7 +421,36 @@ tidy_genomic_data <- function(
keep.allele.names <- radiator.dots[["keep.allele.names"]]

if (is.null(keep.allele.names)) keep.allele.names <- FALSE
snp.read.position.filter <- radiator.dots[["snp.read.position.filter"]]
mac.threshold <- radiator.dots[["mac.threshold"]]
ref.calibration <- radiator.dots[["ref.calibration"]]
gt.vcf.nuc <- radiator.dots[["gt.vcf.nuc"]]
gt.vcf <- radiator.dots[["gt.vcf"]]
gt <- radiator.dots[["gt"]]
gt.bin <- radiator.dots[["gt.bin"]]
vcf.stats <- radiator.dots[["vcf.stats"]]
filename <- radiator.dots[["filename"]]
keep.gds <- radiator.dots[["keep.gds"]]

if (is.null(keep.gds)) keep.gds <- TRUE
if (is.null(vcf.stats)) vcf.stats <- TRUE
if (is.null(ref.calibration)) ref.calibration <- FALSE
if (is.null(gt.vcf.nuc)) gt.vcf.nuc <- TRUE
if (is.null(gt.vcf)) gt.vcf <- TRUE
if (is.null(gt)) gt <- TRUE
if (is.null(gt.bin)) gt.bin <- TRUE


if (!gt.vcf.nuc && !gt) {
stop("At least one of gt.vcf.nuc or gt must be TRUE")
}

if (!is.null(snp.read.position.filter)) {
snp.read.position.filter <- match.arg(
arg = snp.read.position.filter,
choices = c("outliers", "iqr", "q75"),
several.ok = TRUE)
}

# File type detection----------------------------------------------------------
skip.tidy.wide <- FALSE # initiate for data frame below
Expand Down Expand Up @@ -526,8 +557,19 @@ tidy_genomic_data <- function(
blacklist.id = blacklist.id,
pop.select = pop.select,
pop.levels = pop.levels,
pop.labels = pop.labels
)
pop.labels = pop.labels,
filename = NULL,
vcf.stats = TRUE,
snp.read.position.filter = NULL,
mac.threshold = NULL,
gt.vcf.nuc = TRUE,
gt.vcf = TRUE,
gt = TRUE,
gt.bin = TRUE,
keep.gds = FALSE
) %$%
genotypes

biallelic <- radiator::detect_biallelic_markers(input)
} # End import VCF

Expand Down Expand Up @@ -1167,12 +1209,12 @@ tidy_genomic_data <- function(

# Minor Allele Frequency filter ----------------------------------------------
if (!is.null(maf.thresholds)) { # with MAF
input <- radiator::filter_maf(
data = input,
interactive.filter = FALSE,
maf.thresholds = maf.thresholds,
parallel.core = parallel.core,
verbose = FALSE)$tidy.filtered.maf
input <- radiator::filter_maf(
data = input,
interactive.filter = FALSE,
maf.thresholds = maf.thresholds,
parallel.core = parallel.core,
verbose = FALSE)$tidy.filtered.maf
} # End of MAF filters


Expand Down
Loading

0 comments on commit 1ade09d

Please sign in to comment.