**v.0.3.7**

* `assigner::fst_WC84` is 30% faster!
thierrygosselin · Nov 23, 2016 · 27eae1d · 27eae1d
1 parent 0049e03
commit 27eae1d
Show file tree

Hide file tree

Showing 8 changed files with 175 additions and 236 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: assigner
 Type: Package
 Title: Assignment Analysis with GBS/RADseq Data using R
-Version: 0.3.6
-Date: 2016-11-14
+Version: 0.3.7
+Date: 2016-11-22
 Encoding: UTF-8
 Authors@R: c(
   person("Thierry", "Gosselin", email = "[email protected]", role = c("aut", "cre")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,31 @@
+# assigner v.0.3.7
+* `assigner::fst_WC84` is 30% faster!
+
+
+# assigner v.0.3.6
+* bug fix `assignment_ngs` during imputations, the imputation module could not
+recognise that REF/ALT alleles are not necessary or usefull for assignment analysis.
+*enhancement to `assignment_ngs` and `assignment_mixture` so that when 
+`marker.number` include `"all"` the `iteration.method` is set automatically to `1`
+when conducting the assignment with all the markers.
+Iterations at this point is useless and a waist of time.
+* random seed number is now stored in the appropriate files.
+* `assignment_mixture`: with `assignment.analysis = "gsi_sim` the unknown/mixture samples
+are compared with baseline populations using common markers between the pair.
+Now, the tables include the number of markers used. The summary provides the mean
+number of markers. This number will change each time randomness is used.
+
+# assigner v.0.3.5
+* bug fix in population not recognise properly
+
+# assigner v.0.3.4
+* `fst_NEI87`: very fast function that can compute: the overall and pairwise Nei's (1987) fst and f'st (prime). 
+Bootstrap resampling of markers is avalaible to build Confidence Intervals. The estimates are available as a data frame and a matrix with upper diagonal filled with Fst values and lower diagonal filled with the confidence intervals. Jost's D is also given ;)
+
+# assigner v.0.3.3
+* `fst_WC84`: bug fix, the function was not properly configured for multi-allelic markers (e.g. microsatellite, and haplotype format from STACKS). Thanks to Craig McDougall for catching this.
+
+
 # assigner v.0.3.2
 * `assignment_mixture`: added a check to throw an error when pop.levels != the pop.id in strata
 

diff --git a/R/fst_NEI87.R b/R/fst_NEI87.R
@@ -428,39 +428,44 @@ fst_NEI87 <- function(
       tibble::as_data_frame()
 
 
-    # frequency per markes, alleles, pop
+    # frequency per markers, alleles, pop
     p <- x %>%
       dplyr::group_by(MARKERS, POP_ID) %>%
       dplyr::count(GT) %>% 
       dplyr::mutate(P = n / sum(n)) %>% 
       dplyr::select(-n) %>% 
-      dplyr::arrange(MARKERS, POP_ID, GT) #%>% complete(data = ., POP_ID, nesting(MARKERS, GT), fill = list(P = 0)) %>%
+      dplyr::arrange(MARKERS, POP_ID, GT) %>%  #%>% complete(data = ., POP_ID, nesting(MARKERS, GT), fill = list(P = 0)) %>%
+      dplyr::ungroup(.)
 
     # mp: mean frequency per markers
+    # mp2: sum of square mean frequency per markers
     mean.p2 <- p %>% 
       tidyr::complete(data = ., POP_ID, tidyr::nesting(MARKERS, GT), fill = list(P = 0)) %>%
       dplyr::group_by(MARKERS, GT) %>% 
       dplyr::summarise(MP = mean(P, na.rm = TRUE)) %>% 
       dplyr::group_by(MARKERS) %>% 
-      dplyr::summarise(MP2 = sum(MP^2))
+      dplyr::summarise(MP2 = sum(MP^2)) %>% 
+      dplyr::ungroup(.)
 
-    # msp2 mean frequency per markers
+    # msp2 mean frequency per markers per pop
     mean.frequency.markers <- p %>%
       dplyr::group_by(MARKERS, POP_ID) %>% 
       dplyr::summarise(SP2 = sum(P^2)) %>% 
       dplyr::group_by(MARKERS) %>% 
-      dplyr::summarise(MSP2 = mean(SP2, na.rm = TRUE))
+      dplyr::summarise(MSP2 = mean(SP2, na.rm = TRUE)) %>% 
+      dplyr::ungroup(.)
 
     # For diploid-------------------------------------------------------------------
     # Mean heterozygosity observed per pop and markers
     # mean heterozygosity across all markers
     mean.het.obs.markers <- x %>%
       dplyr::group_by(POP_ID, MARKERS, INDIVIDUALS) %>% 
-      dplyr::mutate(HO = if_else(GT[ALLELES == "A1"] != GT[ALLELES == "A2"], 1, 0)) %>% 
+      dplyr::mutate(HO = dplyr::if_else(GT[ALLELES == "A1"] != GT[ALLELES == "A2"], 1, 0)) %>% 
       dplyr::group_by(POP_ID, MARKERS) %>% 
       dplyr::summarise(HO = mean(HO)) %>% 
       dplyr::group_by(MARKERS) %>% 
-      dplyr::summarise(HO = mean(HO))
+      dplyr::summarise(HO = mean(HO)) %>% 
+      dplyr::ungroup(.)
 
     # mn: corrected mean number of individuals per markers
     #n: number of individuals, per pop and markers
@@ -481,8 +486,8 @@ fst_NEI87 <- function(
       dplyr::full_join(mean.frequency.markers, by = "MARKERS") %>%
       dplyr::full_join(mean.p2, by = "MARKERS") %>% 
       dplyr::mutate(
-        HS = MN / (MN - 1) * (1 - MSP2 - HO / 2 / MN),
-        HT = 1 - MP2 + HS / MN / NP - HO / 2 / MN / NP,
+        HS = MN / (MN - 1) * (1 - MSP2 - HO / 2 / MN),#Expected Heterozygosity within populations
+        HT = 1 - MP2 + HS / MN / NP - HO / 2 / MN / NP,# Total Gene diversity
         FIS = 1 - HO / HS,
         DST = HT - HS,
         DST_P = NP / (NP - 1) * DST,