From 685977d1f329b7a786c898fdb5807bdcc9222464 Mon Sep 17 00:00:00 2001 From: Thierry Gosselin Date: Wed, 27 Apr 2016 00:58:24 +1200 Subject: [PATCH] v.0.2.4 introducing a new function `import_subsamples_fst` --- DESCRIPTION | 4 +-- NAMESPACE | 1 + R/import_subsamples.R | 20 +++++++++++--- R/import_subsamples_fst.R | 51 ++++++++++++++++++++++++++++++++++++ README.md | 6 +++++ man/import_subsamples_fst.Rd | 30 +++++++++++++++++++++ 6 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 R/import_subsamples_fst.R create mode 100644 man/import_subsamples_fst.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 61944ad..cec81ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: assigner Type: Package Title: Assignment Analysis with GBS/RADseq Data using R -Version: 0.2.3 -Date: 2016-04-23 +Version: 0.2.4 +Date: 2016-04-27 Encoding: UTF-8 Authors@R: c( person("Thierry", "Gosselin", email = "thierrygosselin@icloud.com", role = c("aut", "cre"))) diff --git a/NAMESPACE b/NAMESPACE index 873d1bf..eae09d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(assignment_ngs) export(dlr) export(gsi_sim_binary) export(import_subsamples) +export(import_subsamples_fst) export(install_gsi_sim) export(plot_assignment) export(plot_assignment_dlr) diff --git a/R/import_subsamples.R b/R/import_subsamples.R index 15d2d57..a89ea28 100644 --- a/R/import_subsamples.R +++ b/R/import_subsamples.R @@ -29,16 +29,28 @@ import_subsamples <- function(dir.path, imputations){ if (missing (dir.path)) stop("dir.path argument missing") if (missing (imputations)) imputations <- FALSE + sampling.method <- stri_detect_fixed(str = dir.path, pattern = "ranked") # looks for ranked + subsample.folders <- list.files(path = dir.path, pattern = "subsample_", full.names = FALSE) data <- list() for (i in subsample.folders) { sub.name <- stri_replace_all_fixed(str = i, pattern = "_", replacement = ".", vectorize_all = FALSE) - if (imputations == TRUE){ - filename <- stri_paste(i, "/","assignment.random.imputed.results.individuals.iterations.", sub.name, ".tsv") + if (sampling.method == FALSE){ + if (imputations == TRUE){ + filename <- stri_paste(i, "/","assignment.random.imputed.results.individuals.iterations.", sub.name, ".tsv") + } else { + filename <- stri_paste(i, "/","assignment.random.no.imputation.results.individuals.iterations.", sub.name, ".tsv") + } } else { - filename <- stri_paste(i, "/","assignment.random.no.imputation.results.individuals.iterations.", sub.name, ".tsv") + if (imputations == TRUE){ + filename <- stri_paste(i, "/","assignment.ranked.imputed.results.individuals.iterations.", sub.name, ".tsv") + } else { + filename <- stri_paste(i, "/","assignment.ranked.no.imputation.results.individuals.iterations.", sub.name, ".tsv") + } } - subsample.data <- read_tsv(file = filename, col_names = TRUE) #%>% filter (MISSING_DATA == 'no.imputation') + subsample.data <- read_tsv(file = filename, col_names = TRUE) + # mutate(SUBSAMPLE = rep(i, n())) + # filter (MISSING_DATA == 'no.imputation') data[[i]] <- subsample.data } data <- as_data_frame(bind_rows(data)) diff --git a/R/import_subsamples_fst.R b/R/import_subsamples_fst.R new file mode 100644 index 0000000..546cb02 --- /dev/null +++ b/R/import_subsamples_fst.R @@ -0,0 +1,51 @@ +# Write a dataframe containing all the subsample individual assignment + +#' @name import_subsamples_fst +#' @title Import the fst ranking from all the subsample runs inside +#' an assignment folder. +#' @description This function will import all the fst ranking from all the +#' subsample runs inside an assignment folder. +#' @param dir.path The path to the directory containing the subsample folders. +#' @return A data frame of all the Fst and ranking. + +#' @export +#' @rdname import_subsamples_fst +#' @import dplyr +#' @import stringi + + +#' @examples +#' \dontrun{ +#' subsamples.data <- import_subsamples_fst( +#' dir.path = "assignment_analysis_method_ranked_no_imputations_20160425@2321" +#' ) +#' } + +#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com} + +import_subsamples_fst <- function(dir.path){ + if (missing (dir.path)) stop("dir.path argument missing") + + sampling.method <- stri_detect_fixed(str = dir.path, pattern = "ranked") # looks for ranked + + if (sampling.method == FALSE) stop("This function doesn't work for markers sampled randomly") + subsample.folders <- list.files(path = dir.path, pattern = "subsample_", full.names = FALSE) + + data.subsample <- list() + for (i in subsample.folders) { + fst.files.list <- list.files(path = stri_paste(dir.path, "/", i), pattern = "fst.ranked", full.names = FALSE) + data.fst <- list() + for (j in fst.files.list) { + fst.file <- read_tsv(file = stri_paste(dir.path, "/", i, "/", j), col_names = TRUE) %>% + mutate( + SUBSAMPLE = rep(i, n()), + ITERATIONS = rep(j, n()) + ) + data.fst[[j]] <- fst.file + } + data.fst <- as_data_frame(bind_rows(data.fst)) + data.subsample[[i]] <- data.fst + } + data <- as_data_frame(bind_rows(data.subsample)) + return(data) +} diff --git a/README.md b/README.md index ea62589..2d4a239 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ by Eric C. Anderson (see Anderson et al. 2008 and Anderson 2010) or [adegenet] ( 2. an haplotypes data frame file (*batch_x.haplotypes.tsv*) produced by [STACKS] (http://catchenlab.life.illinois.edu/stacks/) (Catchen et al. 2011, 2013), 3. very large files (> 50 000 markers) can be imported in PLINK tped/tfam format (Purcell et al. 2007), and 4. a data frame of genotypes. + 5. For genepop file, use [genepopedit](https://github.com/rystanley/genepopedit) to prepare the file for assigner. * Individuals, populations and markers can be **filtered** and/or selected in several ways using **blacklist, whitelist** and other arguments * **Map-independent imputation** of missing genotype or alleles using **Random Forest** or the most frequent category is also available to test the impact of missing data on assignment analysis @@ -140,6 +141,11 @@ The Amazon image can be imported into Google Cloud Compute Engine to start a new ## New +**v.0.2.4** +* bug fixed using adegenet that was introduced in v.0.2.3 +* introducing a new function `import_subsamples_fst` to import the fst ranking +results from all the subsample runs inside an assignment folder. + **v.0.2.3** * bug fixed in the compilation results section when no pop.id.start and end are used. diff --git a/man/import_subsamples_fst.Rd b/man/import_subsamples_fst.Rd new file mode 100644 index 0000000..36e3fc9 --- /dev/null +++ b/man/import_subsamples_fst.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import_subsamples_fst.R +\name{import_subsamples_fst} +\alias{import_subsamples_fst} +\title{Import the fst ranking from all the subsample runs inside +an assignment folder.} +\usage{ +import_subsamples_fst(dir.path) +} +\arguments{ +\item{dir.path}{The path to the directory containing the subsample folders.} +} +\value{ +A data frame of all the Fst and ranking. +} +\description{ +This function will import all the fst ranking from all the +subsample runs inside an assignment folder. +} +\examples{ +\dontrun{ +subsamples.data <- import_subsamples_fst( +dir.path = "assignment_analysis_method_ranked_no_imputations_20160425@2321" +) +} +} +\author{ +Thierry Gosselin \email{thierrygosselin@icloud.com} +} +