From 685977d1f329b7a786c898fdb5807bdcc9222464 Mon Sep 17 00:00:00 2001
From: Thierry Gosselin <thierrygosselin@icloud.com>
Date: Wed, 27 Apr 2016 00:58:24 +1200
Subject: [PATCH] v.0.2.4 introducing a new function `import_subsamples_fst`

---
 DESCRIPTION                  |  4 +--
 NAMESPACE                    |  1 +
 R/import_subsamples.R        | 20 +++++++++++---
 R/import_subsamples_fst.R    | 51 ++++++++++++++++++++++++++++++++++++
 README.md                    |  6 +++++
 man/import_subsamples_fst.Rd | 30 +++++++++++++++++++++
 6 files changed, 106 insertions(+), 6 deletions(-)
 create mode 100644 R/import_subsamples_fst.R
 create mode 100644 man/import_subsamples_fst.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 61944ad..cec81ff 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: assigner
 Type: Package
 Title: Assignment Analysis with GBS/RADseq Data using R
-Version: 0.2.3
-Date: 2016-04-23
+Version: 0.2.4
+Date: 2016-04-27
 Encoding: UTF-8
 Authors@R: c(
   person("Thierry", "Gosselin", email = "thierrygosselin@icloud.com", role = c("aut", "cre")))
diff --git a/NAMESPACE b/NAMESPACE
index 873d1bf..eae09d2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ export(assignment_ngs)
 export(dlr)
 export(gsi_sim_binary)
 export(import_subsamples)
+export(import_subsamples_fst)
 export(install_gsi_sim)
 export(plot_assignment)
 export(plot_assignment_dlr)
diff --git a/R/import_subsamples.R b/R/import_subsamples.R
index 15d2d57..a89ea28 100644
--- a/R/import_subsamples.R
+++ b/R/import_subsamples.R
@@ -29,16 +29,28 @@ import_subsamples <- function(dir.path, imputations){
   if (missing (dir.path)) stop("dir.path argument missing")
   if (missing (imputations)) imputations <- FALSE
   
+  sampling.method <- stri_detect_fixed(str = dir.path, pattern = "ranked") # looks for ranked
+  
   subsample.folders <- list.files(path = dir.path, pattern = "subsample_", full.names = FALSE)
   data <- list()
   for (i in subsample.folders) {
     sub.name <- stri_replace_all_fixed(str = i, pattern = "_", replacement = ".", vectorize_all = FALSE)
-    if (imputations == TRUE){
-      filename <- stri_paste(i, "/","assignment.random.imputed.results.individuals.iterations.", sub.name, ".tsv")
+    if (sampling.method == FALSE){
+      if (imputations == TRUE){
+        filename <- stri_paste(i, "/","assignment.random.imputed.results.individuals.iterations.", sub.name, ".tsv")
+      } else {
+        filename <- stri_paste(i, "/","assignment.random.no.imputation.results.individuals.iterations.", sub.name, ".tsv")
+      }
     } else {
-      filename <- stri_paste(i, "/","assignment.random.no.imputation.results.individuals.iterations.", sub.name, ".tsv")
+      if (imputations == TRUE){
+        filename <- stri_paste(i, "/","assignment.ranked.imputed.results.individuals.iterations.", sub.name, ".tsv")
+      } else {
+        filename <- stri_paste(i, "/","assignment.ranked.no.imputation.results.individuals.iterations.", sub.name, ".tsv")
+      }
     }
-    subsample.data <- read_tsv(file = filename, col_names = TRUE) #%>% filter (MISSING_DATA == 'no.imputation')
+    subsample.data <- read_tsv(file = filename, col_names = TRUE) 
+    # mutate(SUBSAMPLE = rep(i, n()))
+      # filter (MISSING_DATA == 'no.imputation')
     data[[i]] <- subsample.data
   }
   data <- as_data_frame(bind_rows(data))
diff --git a/R/import_subsamples_fst.R b/R/import_subsamples_fst.R
new file mode 100644
index 0000000..546cb02
--- /dev/null
+++ b/R/import_subsamples_fst.R
@@ -0,0 +1,51 @@
+# Write a dataframe containing all the subsample individual assignment
+
+#' @name import_subsamples_fst
+#' @title Import the fst ranking from all the subsample runs inside 
+#' an assignment folder.
+#' @description This function will import all the fst ranking from all the 
+#' subsample runs inside an assignment folder.
+#' @param dir.path The path to the directory containing the subsample folders.
+#' @return A data frame of all the Fst and ranking.
+
+#' @export
+#' @rdname import_subsamples_fst
+#' @import dplyr
+#' @import stringi
+
+
+#' @examples
+#' \dontrun{
+#' subsamples.data <- import_subsamples_fst(
+#' dir.path = "assignment_analysis_method_ranked_no_imputations_20160425@2321"
+#' )
+#' }
+
+#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com}
+
+import_subsamples_fst <- function(dir.path){
+  if (missing (dir.path)) stop("dir.path argument missing")
+  
+  sampling.method <- stri_detect_fixed(str = dir.path, pattern = "ranked") # looks for ranked
+  
+  if (sampling.method == FALSE) stop("This function doesn't work for markers sampled randomly")
+  subsample.folders <- list.files(path = dir.path, pattern = "subsample_", full.names = FALSE)
+  
+  data.subsample <- list()
+  for (i in subsample.folders) {
+    fst.files.list <- list.files(path = stri_paste(dir.path, "/", i), pattern = "fst.ranked", full.names = FALSE)
+    data.fst <- list()
+    for (j in fst.files.list) {
+      fst.file <- read_tsv(file = stri_paste(dir.path, "/", i, "/", j), col_names = TRUE) %>% 
+        mutate(
+          SUBSAMPLE = rep(i, n()),
+          ITERATIONS = rep(j, n())
+        )
+      data.fst[[j]] <- fst.file
+    }
+    data.fst <- as_data_frame(bind_rows(data.fst))
+    data.subsample[[i]] <- data.fst
+  }
+  data <- as_data_frame(bind_rows(data.subsample))
+  return(data)
+}
diff --git a/README.md b/README.md
index ea62589..2d4a239 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ by Eric C. Anderson (see Anderson et al. 2008 and Anderson 2010) or [adegenet] (
     2. an haplotypes data frame file (*batch_x.haplotypes.tsv*) produced by [STACKS] (http://catchenlab.life.illinois.edu/stacks/) (Catchen et al. 2011, 2013),
     3. very large files (> 50 000 markers) can be imported in PLINK tped/tfam format (Purcell et al. 2007), and
     4. a data frame of genotypes.
+    5. For genepop file, use [genepopedit](https://github.com/rystanley/genepopedit) to prepare the file for assigner.
 * Individuals, populations and markers can be **filtered** and/or selected in several ways using **blacklist, 
 whitelist** and other arguments
 * **Map-independent imputation** of missing genotype or alleles using **Random Forest** or the most frequent category is also available to test the impact of missing data on assignment analysis
@@ -140,6 +141,11 @@ The Amazon image can be imported into Google Cloud Compute Engine to start a new
 
 ## New
 
+**v.0.2.4**
+* bug fixed using adegenet that was introduced in v.0.2.3
+* introducing a new function `import_subsamples_fst` to import the fst ranking 
+results from all the subsample runs inside an assignment folder.
+
 **v.0.2.3**
 * bug fixed in the compilation results section when no pop.id.start and end are used.
 
diff --git a/man/import_subsamples_fst.Rd b/man/import_subsamples_fst.Rd
new file mode 100644
index 0000000..36e3fc9
--- /dev/null
+++ b/man/import_subsamples_fst.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/import_subsamples_fst.R
+\name{import_subsamples_fst}
+\alias{import_subsamples_fst}
+\title{Import the fst ranking from all the subsample runs inside 
+an assignment folder.}
+\usage{
+import_subsamples_fst(dir.path)
+}
+\arguments{
+\item{dir.path}{The path to the directory containing the subsample folders.}
+}
+\value{
+A data frame of all the Fst and ranking.
+}
+\description{
+This function will import all the fst ranking from all the 
+subsample runs inside an assignment folder.
+}
+\examples{
+\dontrun{
+subsamples.data <- import_subsamples_fst(
+dir.path = "assignment_analysis_method_ranked_no_imputations_20160425@2321"
+)
+}
+}
+\author{
+Thierry Gosselin \email{thierrygosselin@icloud.com}
+}
+