# radiator 1.1.7 2020-08-21

* PLINK files: fix a couple of bugs reading the tped * removed `tidyr::gather` and `tidyr::spread` dependencies (they are deprecated) * DArT data in 1-row format was not working properly with latest `data.table` melt function. Changed to `tidyr::pivot_long`.
thierrygosselin · Aug 21, 2020 · 232317c · 232317c
1 parent 6c08e36
commit 232317c
Show file tree

Hide file tree

Showing 296 changed files with 811 additions and 2,445 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: radiator
 Type: Package
 Title: RADseq Data Exploration, Manipulation and Visualization using R
-Version: 1.1.6
-Date: 2020-06-23
+Version: 1.1.7
+Date: 2020-08-21
 Encoding: UTF-8
 Authors@R: c(
   person("Thierry", "Gosselin", email = "[email protected]", role = c("aut", "cre")),
@@ -46,7 +46,7 @@ Suggests:
     stringdist,
     stringi,
     tibble,
-    tidyr (>= 1.0.0),
+    tidyr (>= 1.1.0),
     utils
 License: GPL-3
 LazyLoad: yes

diff --git a/NAMESPACE b/NAMESPACE
@@ -46,7 +46,6 @@ export(detect_mixed_genomes)
 export(detect_paralogs)
 export(detect_ref_genome)
 export(diagnostic_maf)
-export(discard_monomorphic_markers)
 export(distance2tibble)
 export(distance_individuals)
 export(erase_genotypes)
@@ -63,19 +62,16 @@ export(extract_markers_metadata)
 export(filter_blacklist_genotypes)
 export(filter_common_markers)
 export(filter_coverage)
-export(filter_dart)
 export(filter_dart_reproducibility)
 export(filter_fis)
 export(filter_genotyping)
 export(filter_het)
 export(filter_hwe)
-export(filter_individual)
 export(filter_individuals)
 export(filter_ld)
 export(filter_mac)
 export(filter_maf)
 export(filter_monomorphic)
-export(filter_population)
 export(filter_rad)
 export(filter_snp_number)
 export(filter_snp_position_read)
@@ -105,7 +101,6 @@ export(ind_total_reads)
 export(individuals2strata)
 export(integrate_ref)
 export(join_strata)
-export(keep_common_markers)
 export(ld2df)
 export(ld_boxplot)
 export(ld_missing)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# radiator 1.1.7 2020-08-21
+
+* PLINK files: fix a couple of bugs reading the tped
+* removed `tidyr::gather` and `tidyr::spread` dependencies (they are deprecated)
+* DArT data in 1-row format was not working properly with latest `data.table` melt function. 
+Changed to `tidyr::pivot_long`.
+
+
 # radiator 1.1.6 2020-06-23
 
 * updated radiator so that it work with latest release of SeqArray (v.1.28.1), that

diff --git a/R/allele_frequencies.R b/R/allele_frequencies.R
@@ -92,7 +92,8 @@ allele_frequencies <- function(data, verbose = TRUE) {
   maf <- maf.data
   maf.data <- NULL
   maf.local.wide <- dplyr::select(.data = maf, MARKERS, POP_ID, MAF_LOCAL) %>%
-    tidyr::spread(data = ., key = MARKERS, value = MAF_LOCAL)
+    tidyr::pivot_wider(data = ., names_from = "MARKERS", values_from = "MAF_LOCAL")
+
 
   maf.global <- dplyr::distinct(.data = maf, MARKERS, MAF_GLOBAL)
 
@@ -104,16 +105,21 @@ allele_frequencies <- function(data, verbose = TRUE) {
 
     freq.wide <- dplyr::ungroup(freq) %>%
       dplyr::select(MARKERS, POP_ID, REF = FREQ_REF, ALT = MAF_LOCAL) %>%
-      tidyr::gather(data = ., key = ALLELES, value = FREQ, -c(MARKERS, POP_ID)) %>%
+      tidyr::pivot_longer(
+        data = .,
+        cols = -c("POP_ID", "MARKERS"),
+        names_to = "ALLELES",
+        values_to = "FREQ"
+      ) %>%
       dplyr::mutate(MARKERS_ALLELES = stringi::stri_join(MARKERS, ALLELES, sep = ".")) %>%
       dplyr::select(-MARKERS, -ALLELES) %>%
       dplyr::arrange(MARKERS_ALLELES, POP_ID) %>%
       dplyr::group_by(POP_ID) %>%
-      tidyr::spread(data = ., key = MARKERS_ALLELES, value = FREQ)
+      tidyr::pivot_wider(data = ., names_from = "MARKERS_ALLELES", values_from = "FREQ")
 
     freq.mat <- suppressWarnings(
       freq.wide %>%
-        tibble::remove_rownames(df = .) %>%
+        tibble::remove_rownames(.data = .) %>%
         tibble::column_to_rownames(.data = ., var = "POP_ID") %>%
         as.matrix(.)
     )
@@ -132,7 +138,12 @@ allele_frequencies <- function(data, verbose = TRUE) {
         A2 = stringi::stri_sub(GT, 4,6)
       ) %>%
       dplyr::select(MARKERS, POP_ID, INDIVIDUALS, A1, A2) %>%
-      tidyr::gather(key = ALLELES_GROUP, ALLELES, -c(INDIVIDUALS, POP_ID, MARKERS)) %>%
+      tidyr::pivot_longer(
+        data = .,
+        cols = -c("POP_ID", "INDIVIDUALS", "MARKERS"),
+        names_to = "ALLELES_GROUP",
+        values_to = "ALLELES"
+      ) %>%
       dplyr::group_by(MARKERS, ALLELES, POP_ID) %>%
       dplyr::tally(.) %>%
       dplyr::ungroup(.) %>%
@@ -153,7 +164,7 @@ allele_frequencies <- function(data, verbose = TRUE) {
       dplyr::select(-MARKERS, -ALLELES) %>%
       dplyr::arrange(MARKERS_ALLELES, POP_ID) %>%
       dplyr::group_by(POP_ID) %>%
-      tidyr::spread(data = ., key = MARKERS_ALLELES, value = FREQ)
+      tidyr::pivot_wider(data = ., names_from = "MARKERS_ALLELES", values_from = "FREQ")
 
     freq.mat <- suppressWarnings(
       freq.wide %>%

diff --git a/R/bayescan.R b/R/bayescan.R
@@ -420,7 +420,7 @@ run_bayescan <- function(
     ) %>%
       tidyr::pivot_longer(
         data = .,
-        cols = tidyselect::everything(),
+        cols = dplyr::everything(),
         names_to = "ACCURACY_MARKERS",
         values_to = "N"
       ) %>%

diff --git a/R/betas_estimator.R b/R/betas_estimator.R
@@ -116,7 +116,12 @@ betas_estimator <- function(
         A2 = stringi::stri_sub(GT, 4,6)
       ) %>%
       dplyr::select(MARKERS, POP_ID, INDIVIDUALS, A1, A2) %>%
-      tidyr::gather(data = ., key = ALLELES, value = GT, -c(MARKERS, INDIVIDUALS, POP_ID)) %>%
+      tidyr::pivot_longer(
+        data = .,
+        cols = -c("POP_ID", "INDIVIDUALS", "MARKERS"),
+        names_to = "ALLELES",
+        values_to = "GT"
+      ) %>%
       dplyr::group_by(MARKERS, GT, POP_ID) %>%
       dplyr::tally(.) %>%
       dplyr::ungroup() %>%

diff --git a/R/dart.R b/R/dart.R
@@ -391,16 +391,24 @@ read_dart <- function(
     suppressWarnings(
       data %<>%
         dplyr::select(dplyr::one_of(want)) %>%
-        data.table::as.data.table(x = .) %>%
-        data.table::melt.data.table(
+        tidyr::pivot_longer(
           data = .,
-          id.vars = c("CLONE_ID", "SEQUENCE"),
-          variable.name = "INDIVIDUALS",
-          variable.factor = FALSE,
-          value.name = "VALUE"
-        ) %>%
-        tibble::as_tibble(.)
+          cols = -c("CLONE_ID", "SEQUENCE"),
+          names_to = "INDIVIDUALS",
+          values_to = "VALUE"
+        )
     )
+
+    # data.table::as.data.table(x = .) %>%
+    # data.table::melt.data.table(
+    #   data = .,
+    #   id.vars = c("CLONE_ID", "SEQUENCE"),
+    #   variable.name = "INDIVIDUALS",
+    #   variable.factor = FALSE,
+    #   value.name = "VALUE"
+    # ) %>%
+    # tibble::as_tibble(.)
+
     n.clone <- length(unique(data$CLONE_ID))
     data <- radiator::join_strata(data = data, strata = strata)
 
@@ -1134,7 +1142,7 @@ dart2gds <- function(
 
     alt %<>% dplyr::bind_rows(ref.s) %>%
       dplyr::arrange(VARIANT_ID)
-    ref %<>% dplyr::bind_rows(alt.s)%>%
+    ref %<>% dplyr::bind_rows(alt.s) %>%
       dplyr::arrange(VARIANT_ID)
     alt.s <- ref.s <- NULL
 
@@ -1175,25 +1183,41 @@ dart2gds <- function(
   }
 
   genotypes.meta <- suppressMessages(
-    data.table::as.data.table(alt) %>%
-      data.table::melt.data.table(
-        data = .,
-        id.vars = want,
-        variable.name = "INDIVIDUALS",
-        value.name = "ALLELE_ALT_DEPTH",
-        variable.factor = FALSE) %>%
-      tibble::as_tibble(.) %>%
+    tidyr::pivot_longer(
+      data = alt,
+      cols = -want,
+      names_to = "INDIVIDUALS",
+      values_to = "ALLELE_ALT_DEPTH"
+    ) %>%
       dplyr::bind_cols(
-        data.table::as.data.table(ref) %>%
-          data.table::melt.data.table(
-            data = .,
-            id.vars = c("VARIANT_ID", "MARKERS"),
-            variable.name = "INDIVIDUALS",
-            value.name = "ALLELE_REF_DEPTH",
-            variable.factor = FALSE) %>%
-          tibble::as_tibble(.)
+        tidyr::pivot_longer(
+          data = ref,
+          cols = -c("VARIANT_ID", "MARKERS"),
+          names_to = "INDIVIDUALS",
+          values_to = "ALLELE_REF_DEPTH"
+        )
       )
   )
+
+  #   data.table::as.data.table(alt) %>%
+  #     data.table::melt.data.table(
+  #       data = .,
+  #       id.vars = want,
+  #       variable.name = "INDIVIDUALS",
+  #       value.name = "ALLELE_ALT_DEPTH",
+  #       variable.factor = FALSE) %>%
+  #     tibble::as_tibble(.) %>%
+  #     dplyr::bind_cols(
+  #       data.table::as.data.table(ref) %>%
+  #         data.table::melt.data.table(
+  #           data = .,
+  #           id.vars = c("VARIANT_ID", "MARKERS"),
+  #           variable.name = "INDIVIDUALS",
+  #           value.name = "ALLELE_REF_DEPTH",
+  #           variable.factor = FALSE) %>%
+  #         tibble::as_tibble(.)
+  #     )
+  # )
   ref <- alt <- NULL
 
   # Faster to check that the bind_cols worked by checking the variant id
@@ -1428,7 +1452,12 @@ tidy_dart_metadata <- function(
       COL_TYPE = c("c", "c", "i", "d", "d", "d", "d"))
 
     dart.col.type <- dart.col.type %>%
-      tidyr::gather(data = .,key = DELETE, value = INFO) %>%
+      tidyr::pivot_longer(
+        data = .,
+        cols = dplyr::everything(),
+        names_to = "DELETE",
+        values_to = "INFO"
+      ) %>%
       dplyr::select(-DELETE) %>%
       dplyr::mutate(INFO = stringi::stri_trans_toupper(INFO)) %>%
       dplyr::left_join(want, by = "INFO") %>%

diff --git a/R/detect_biallelic_markers.R b/R/detect_biallelic_markers.R
@@ -159,9 +159,6 @@ detect_biallelic_markers <- function(data, verbose = FALSE, parallel.core = para
             dplyr::filter(MARKERS %in% sampled.markers) %>%
             dplyr::distinct(MARKERS, GT) %>%
             separate_gt(x = ., gt = "GT", sep = 3, exclude = "MARKERS", parallel.core = parallel.core) %>%
-            # dplyr::mutate(A1 = stringi::stri_sub(GT, 1, 3), A2 = stringi::stri_sub(GT, 4,6)) %>%
-            # dplyr::select(-GT) %>%
-            # tidyr::gather(data = ., key = ALLELES_GROUP, value = ALLELES, -MARKERS) %>%
             dplyr::distinct(MARKERS, HAPLOTYPES) %>%
             dplyr::count(x = ., MARKERS) #%>% dplyr::select(n)
         }
@@ -170,8 +167,6 @@ detect_biallelic_markers <- function(data, verbose = FALSE, parallel.core = para
           data <- dplyr::filter(data, GT_VCF != "./.") %>%
             dplyr::filter(MARKERS %in% sampled.markers) %>%
             dplyr::distinct(MARKERS, GT_VCF) %>%
-            # tidyr::separate(data = ., col = GT_VCF, into = c("A1", "A2"), sep = "/") %>%
-            # tidyr::gather(data = ., key = ALLELES_GROUP, value = ALLELES, -MARKERS) %>%
             separate_gt(x = ., gt = "GT_VCF", exclude = "MARKERS", parallel.core = parallel.core) %>%
             dplyr::distinct(MARKERS, HAPLOTYPES) %>% # Here read alleles, not haplotypes
             dplyr::count(x = ., MARKERS) #%>% dplyr::select(n)
@@ -181,8 +176,6 @@ detect_biallelic_markers <- function(data, verbose = FALSE, parallel.core = para
           data <- dplyr::filter(data, GT_VCF_NUC != "./.") %>%
             dplyr::filter(MARKERS %in% sampled.markers) %>%
             dplyr::distinct(MARKERS, GT_VCF_NUC) %>%
-            # tidyr::separate(data = ., col = GT_VCF_NUC, into = c("A1", "A2"), sep = "/") %>%
-            # tidyr::gather(data = ., key = ALLELES_GROUP, value = ALLELES, -MARKERS) %>%
             separate_gt(x = ., exclude = "MARKERS", parallel.core = parallel.core) %>%
             dplyr::distinct(MARKERS, HAPLOTYPES) %>%
             dplyr::count(x = ., MARKERS)

diff --git a/R/detect_biallelic_problems.R b/R/detect_biallelic_problems.R
@@ -155,7 +155,7 @@ detect_biallelic_problems <- function(
       dplyr::mutate(REF = dplyr::if_else(REF == max(REF), "REF", "ALT")) %>%
       dplyr::ungroup(.) %>%
       dplyr::group_by_at(dplyr::vars(c(markers.metadata, "ALLELES", "REF"))) %>%
-      tidyr::spread(data = ., key = POP_ID, value = N) %>%
+      tidyr::pivot_wider(data = ., names_from = "POP_ID", values_from = "N") %>%
       readr::write_tsv(x = ., path = blacklist.info.filename, na = "-")
     res$blacklist.info <- blacklist.info
     dodgy.markers <- dplyr::n_distinct(blacklist.info$MARKERS)

diff --git a/R/detect_duplicate_genomes.R b/R/detect_duplicate_genomes.R
@@ -382,7 +382,6 @@ detect_duplicate_genomes <- function(
             data = ., id.vars = c("MARKERS", "INDIVIDUALS"), variable.name = "ALLELES", value.name = "n",
             variable.factor = FALSE) %>%
           tibble::as_tibble(.) %>%
-          # tidyr::gather(data = ., key = ALLELES, value = n, -c(MARKERS, INDIVIDUALS)) %>%
           dplyr::mutate(MARKERS_ALLELES = stringi::stri_join(MARKERS, ALLELES, sep = ".")) %>%
           dplyr::select(-ALLELES) %>%
           dplyr::arrange(MARKERS_ALLELES, INDIVIDUALS)
@@ -996,7 +995,7 @@ distance_individuals <- function(
           value.var = "n"
         ) %>%
         tibble::as_tibble(.) %>%
-        tibble::remove_rownames(.) %>%
+        tibble::remove_rownames(.data = .) %>%
         tibble::column_to_rownames(.data = ., var = "INDIVIDUALS"))
 
     x <- suppressWarnings(
@@ -1221,8 +1220,12 @@ allele_count <- function(x) {
       A2 = stringi::stri_sub(str = GT, from = 4, to = 6)
     ) %>%
     dplyr::select(-GT) %>%
-    tidyr::gather(
-      data = ., key = ALLELES, value = GT, -c(MARKERS, INDIVIDUALS)) %>%
+    tidyr::pivot_longer(
+      data = .,
+      cols = -c("INDIVIDUALS", "MARKERS"),
+      names_to = "ALLELES",
+      values_to = "GT"
+    ) %>%
     dplyr::arrange(MARKERS, INDIVIDUALS, GT) %>%
     dplyr::count(x = ., INDIVIDUALS, MARKERS, GT) %>%
     dplyr::ungroup(.) %>%

diff --git a/R/detect_het_outliers.R b/R/detect_het_outliers.R
@@ -201,7 +201,12 @@ res$summary.alt.allele <-  dplyr::ungroup(res$outlier.summary$het.summary) %>%
     ONE_HET_ONLY = length(MARKERS[HOM_ALT == 0 & HET == 1]),
     ONE_HOM_ALT_ONE_HET_ONLY = length(MARKERS[HOM_ALT == 1 & HET == 1])
   ) %>%
-  tidyr::gather(data = ., key = MARKERS, value = NUMBERS) %>%
+  tidyr::pivot_longer(
+    data = .,
+    cols = dplyr::everything(),
+    names_to = "MARKERS",
+    values_to = "NUMBERS"
+  ) %>%
   dplyr::mutate(PROPORTION = NUMBERS / (NUMBERS[MARKERS == "TOTAL"]))
 
 # Estimate heterozygotes miscall rate -------------------------------------------
@@ -397,11 +402,21 @@ plot_het_outliers <- function(data, path.folder = NULL) {
   freq.summary <- dplyr::bind_cols(
     res$het.summary %>%
       dplyr::select(MARKERS, POP_ID, HOM_REF = FREQ_HOM_REF_O, HET = FREQ_HET_O, HOM_ALT = FREQ_HOM_ALT_O) %>%
-      tidyr::gather(data = ., key = GENOTYPES, value = OBSERVED, -c(MARKERS, POP_ID)) %>%
+      tidyr::pivot_longer(
+        data = .,
+        cols = -c("POP_ID", "MARKERS"),
+        names_to = "GENOTYPES",
+        values_to = "OBSERVED"
+      ) %>%
       dplyr::arrange(MARKERS, POP_ID),
     res$het.summary %>%
       dplyr::select(MARKERS, POP_ID, HOM_REF = FREQ_HOM_REF_E, HET = FREQ_HET_E, HOM_ALT = FREQ_HOM_ALT_E) %>%
-      tidyr::gather(data = ., key = GENOTYPES, value = EXPECTED, -c(MARKERS, POP_ID)) %>%
+      tidyr::pivot_longer(
+        data = .,
+        cols = -c("POP_ID", "MARKERS"),
+        names_to = "GENOTYPES",
+        values_to = "EXPECTED"
+      ) %>%
       dplyr::arrange(MARKERS, POP_ID) %>%
       dplyr::select(EXPECTED)
   ) %>%
@@ -507,7 +522,7 @@ estimate_m <- function(
 ) {
   D <- dplyr::select(data, INDIVIDUALS, MARKERS, GT_BIN) %>%
     dplyr::group_by(INDIVIDUALS) %>%
-    tidyr::spread(data = ., key = MARKERS, value = GT_BIN) %>%
+    tidyr::pivot_wider(data = ., names_from = "MARKERS", values_from = "GT_BIN") %>%
     dplyr::ungroup(.) %>%
     dplyr::select(-INDIVIDUALS) %>%
     as.matrix(.)