Skip to content

Commit

Permalink
# radiator 1.1.7 2020-08-21
Browse files Browse the repository at this point in the history
* PLINK files: fix a couple of bugs reading the tped
* removed `tidyr::gather` and `tidyr::spread` dependencies (they are deprecated)
* DArT data in 1-row format was not working properly with latest `data.table` melt function.
Changed to `tidyr::pivot_long`.
  • Loading branch information
thierrygosselin committed Aug 21, 2020
1 parent 6c08e36 commit 232317c
Show file tree
Hide file tree
Showing 296 changed files with 811 additions and 2,445 deletions.
Binary file added .DS_Store
Binary file not shown.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: radiator
Type: Package
Title: RADseq Data Exploration, Manipulation and Visualization using R
Version: 1.1.6
Date: 2020-06-23
Version: 1.1.7
Date: 2020-08-21
Encoding: UTF-8
Authors@R: c(
person("Thierry", "Gosselin", email = "[email protected]", role = c("aut", "cre")),
Expand Down Expand Up @@ -46,7 +46,7 @@ Suggests:
stringdist,
stringi,
tibble,
tidyr (>= 1.0.0),
tidyr (>= 1.1.0),
utils
License: GPL-3
LazyLoad: yes
Expand Down
5 changes: 0 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ export(detect_mixed_genomes)
export(detect_paralogs)
export(detect_ref_genome)
export(diagnostic_maf)
export(discard_monomorphic_markers)
export(distance2tibble)
export(distance_individuals)
export(erase_genotypes)
Expand All @@ -63,19 +62,16 @@ export(extract_markers_metadata)
export(filter_blacklist_genotypes)
export(filter_common_markers)
export(filter_coverage)
export(filter_dart)
export(filter_dart_reproducibility)
export(filter_fis)
export(filter_genotyping)
export(filter_het)
export(filter_hwe)
export(filter_individual)
export(filter_individuals)
export(filter_ld)
export(filter_mac)
export(filter_maf)
export(filter_monomorphic)
export(filter_population)
export(filter_rad)
export(filter_snp_number)
export(filter_snp_position_read)
Expand Down Expand Up @@ -105,7 +101,6 @@ export(ind_total_reads)
export(individuals2strata)
export(integrate_ref)
export(join_strata)
export(keep_common_markers)
export(ld2df)
export(ld_boxplot)
export(ld_missing)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# radiator 1.1.7 2020-08-21

* PLINK files: fix a couple of bugs reading the tped
* removed `tidyr::gather` and `tidyr::spread` dependencies (they are deprecated)
* DArT data in 1-row format was not working properly with latest `data.table` melt function.
Changed to `tidyr::pivot_long`.


# radiator 1.1.6 2020-06-23

* updated radiator so that it work with latest release of SeqArray (v.1.28.1), that
Expand Down
23 changes: 17 additions & 6 deletions R/allele_frequencies.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ allele_frequencies <- function(data, verbose = TRUE) {
maf <- maf.data
maf.data <- NULL
maf.local.wide <- dplyr::select(.data = maf, MARKERS, POP_ID, MAF_LOCAL) %>%
tidyr::spread(data = ., key = MARKERS, value = MAF_LOCAL)
tidyr::pivot_wider(data = ., names_from = "MARKERS", values_from = "MAF_LOCAL")


maf.global <- dplyr::distinct(.data = maf, MARKERS, MAF_GLOBAL)

Expand All @@ -104,16 +105,21 @@ allele_frequencies <- function(data, verbose = TRUE) {

freq.wide <- dplyr::ungroup(freq) %>%
dplyr::select(MARKERS, POP_ID, REF = FREQ_REF, ALT = MAF_LOCAL) %>%
tidyr::gather(data = ., key = ALLELES, value = FREQ, -c(MARKERS, POP_ID)) %>%
tidyr::pivot_longer(
data = .,
cols = -c("POP_ID", "MARKERS"),
names_to = "ALLELES",
values_to = "FREQ"
) %>%
dplyr::mutate(MARKERS_ALLELES = stringi::stri_join(MARKERS, ALLELES, sep = ".")) %>%
dplyr::select(-MARKERS, -ALLELES) %>%
dplyr::arrange(MARKERS_ALLELES, POP_ID) %>%
dplyr::group_by(POP_ID) %>%
tidyr::spread(data = ., key = MARKERS_ALLELES, value = FREQ)
tidyr::pivot_wider(data = ., names_from = "MARKERS_ALLELES", values_from = "FREQ")

freq.mat <- suppressWarnings(
freq.wide %>%
tibble::remove_rownames(df = .) %>%
tibble::remove_rownames(.data = .) %>%
tibble::column_to_rownames(.data = ., var = "POP_ID") %>%
as.matrix(.)
)
Expand All @@ -132,7 +138,12 @@ allele_frequencies <- function(data, verbose = TRUE) {
A2 = stringi::stri_sub(GT, 4,6)
) %>%
dplyr::select(MARKERS, POP_ID, INDIVIDUALS, A1, A2) %>%
tidyr::gather(key = ALLELES_GROUP, ALLELES, -c(INDIVIDUALS, POP_ID, MARKERS)) %>%
tidyr::pivot_longer(
data = .,
cols = -c("POP_ID", "INDIVIDUALS", "MARKERS"),
names_to = "ALLELES_GROUP",
values_to = "ALLELES"
) %>%
dplyr::group_by(MARKERS, ALLELES, POP_ID) %>%
dplyr::tally(.) %>%
dplyr::ungroup(.) %>%
Expand All @@ -153,7 +164,7 @@ allele_frequencies <- function(data, verbose = TRUE) {
dplyr::select(-MARKERS, -ALLELES) %>%
dplyr::arrange(MARKERS_ALLELES, POP_ID) %>%
dplyr::group_by(POP_ID) %>%
tidyr::spread(data = ., key = MARKERS_ALLELES, value = FREQ)
tidyr::pivot_wider(data = ., names_from = "MARKERS_ALLELES", values_from = "FREQ")

freq.mat <- suppressWarnings(
freq.wide %>%
Expand Down
2 changes: 1 addition & 1 deletion R/bayescan.R
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ run_bayescan <- function(
) %>%
tidyr::pivot_longer(
data = .,
cols = tidyselect::everything(),
cols = dplyr::everything(),
names_to = "ACCURACY_MARKERS",
values_to = "N"
) %>%
Expand Down
7 changes: 6 additions & 1 deletion R/betas_estimator.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,12 @@ betas_estimator <- function(
A2 = stringi::stri_sub(GT, 4,6)
) %>%
dplyr::select(MARKERS, POP_ID, INDIVIDUALS, A1, A2) %>%
tidyr::gather(data = ., key = ALLELES, value = GT, -c(MARKERS, INDIVIDUALS, POP_ID)) %>%
tidyr::pivot_longer(
data = .,
cols = -c("POP_ID", "INDIVIDUALS", "MARKERS"),
names_to = "ALLELES",
values_to = "GT"
) %>%
dplyr::group_by(MARKERS, GT, POP_ID) %>%
dplyr::tally(.) %>%
dplyr::ungroup() %>%
Expand Down
81 changes: 55 additions & 26 deletions R/dart.R
Original file line number Diff line number Diff line change
Expand Up @@ -391,16 +391,24 @@ read_dart <- function(
suppressWarnings(
data %<>%
dplyr::select(dplyr::one_of(want)) %>%
data.table::as.data.table(x = .) %>%
data.table::melt.data.table(
tidyr::pivot_longer(
data = .,
id.vars = c("CLONE_ID", "SEQUENCE"),
variable.name = "INDIVIDUALS",
variable.factor = FALSE,
value.name = "VALUE"
) %>%
tibble::as_tibble(.)
cols = -c("CLONE_ID", "SEQUENCE"),
names_to = "INDIVIDUALS",
values_to = "VALUE"
)
)

# data.table::as.data.table(x = .) %>%
# data.table::melt.data.table(
# data = .,
# id.vars = c("CLONE_ID", "SEQUENCE"),
# variable.name = "INDIVIDUALS",
# variable.factor = FALSE,
# value.name = "VALUE"
# ) %>%
# tibble::as_tibble(.)

n.clone <- length(unique(data$CLONE_ID))
data <- radiator::join_strata(data = data, strata = strata)

Expand Down Expand Up @@ -1134,7 +1142,7 @@ dart2gds <- function(

alt %<>% dplyr::bind_rows(ref.s) %>%
dplyr::arrange(VARIANT_ID)
ref %<>% dplyr::bind_rows(alt.s)%>%
ref %<>% dplyr::bind_rows(alt.s) %>%
dplyr::arrange(VARIANT_ID)
alt.s <- ref.s <- NULL

Expand Down Expand Up @@ -1175,25 +1183,41 @@ dart2gds <- function(
}

genotypes.meta <- suppressMessages(
data.table::as.data.table(alt) %>%
data.table::melt.data.table(
data = .,
id.vars = want,
variable.name = "INDIVIDUALS",
value.name = "ALLELE_ALT_DEPTH",
variable.factor = FALSE) %>%
tibble::as_tibble(.) %>%
tidyr::pivot_longer(
data = alt,
cols = -want,
names_to = "INDIVIDUALS",
values_to = "ALLELE_ALT_DEPTH"
) %>%
dplyr::bind_cols(
data.table::as.data.table(ref) %>%
data.table::melt.data.table(
data = .,
id.vars = c("VARIANT_ID", "MARKERS"),
variable.name = "INDIVIDUALS",
value.name = "ALLELE_REF_DEPTH",
variable.factor = FALSE) %>%
tibble::as_tibble(.)
tidyr::pivot_longer(
data = ref,
cols = -c("VARIANT_ID", "MARKERS"),
names_to = "INDIVIDUALS",
values_to = "ALLELE_REF_DEPTH"
)
)
)

# data.table::as.data.table(alt) %>%
# data.table::melt.data.table(
# data = .,
# id.vars = want,
# variable.name = "INDIVIDUALS",
# value.name = "ALLELE_ALT_DEPTH",
# variable.factor = FALSE) %>%
# tibble::as_tibble(.) %>%
# dplyr::bind_cols(
# data.table::as.data.table(ref) %>%
# data.table::melt.data.table(
# data = .,
# id.vars = c("VARIANT_ID", "MARKERS"),
# variable.name = "INDIVIDUALS",
# value.name = "ALLELE_REF_DEPTH",
# variable.factor = FALSE) %>%
# tibble::as_tibble(.)
# )
# )
ref <- alt <- NULL

# Faster to check that the bind_cols worked by checking the variant id
Expand Down Expand Up @@ -1428,7 +1452,12 @@ tidy_dart_metadata <- function(
COL_TYPE = c("c", "c", "i", "d", "d", "d", "d"))

dart.col.type <- dart.col.type %>%
tidyr::gather(data = .,key = DELETE, value = INFO) %>%
tidyr::pivot_longer(
data = .,
cols = dplyr::everything(),
names_to = "DELETE",
values_to = "INFO"
) %>%
dplyr::select(-DELETE) %>%
dplyr::mutate(INFO = stringi::stri_trans_toupper(INFO)) %>%
dplyr::left_join(want, by = "INFO") %>%
Expand Down
7 changes: 0 additions & 7 deletions R/detect_biallelic_markers.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,6 @@ detect_biallelic_markers <- function(data, verbose = FALSE, parallel.core = para
dplyr::filter(MARKERS %in% sampled.markers) %>%
dplyr::distinct(MARKERS, GT) %>%
separate_gt(x = ., gt = "GT", sep = 3, exclude = "MARKERS", parallel.core = parallel.core) %>%
# dplyr::mutate(A1 = stringi::stri_sub(GT, 1, 3), A2 = stringi::stri_sub(GT, 4,6)) %>%
# dplyr::select(-GT) %>%
# tidyr::gather(data = ., key = ALLELES_GROUP, value = ALLELES, -MARKERS) %>%
dplyr::distinct(MARKERS, HAPLOTYPES) %>%
dplyr::count(x = ., MARKERS) #%>% dplyr::select(n)
}
Expand All @@ -170,8 +167,6 @@ detect_biallelic_markers <- function(data, verbose = FALSE, parallel.core = para
data <- dplyr::filter(data, GT_VCF != "./.") %>%
dplyr::filter(MARKERS %in% sampled.markers) %>%
dplyr::distinct(MARKERS, GT_VCF) %>%
# tidyr::separate(data = ., col = GT_VCF, into = c("A1", "A2"), sep = "/") %>%
# tidyr::gather(data = ., key = ALLELES_GROUP, value = ALLELES, -MARKERS) %>%
separate_gt(x = ., gt = "GT_VCF", exclude = "MARKERS", parallel.core = parallel.core) %>%
dplyr::distinct(MARKERS, HAPLOTYPES) %>% # Here read alleles, not haplotypes
dplyr::count(x = ., MARKERS) #%>% dplyr::select(n)
Expand All @@ -181,8 +176,6 @@ detect_biallelic_markers <- function(data, verbose = FALSE, parallel.core = para
data <- dplyr::filter(data, GT_VCF_NUC != "./.") %>%
dplyr::filter(MARKERS %in% sampled.markers) %>%
dplyr::distinct(MARKERS, GT_VCF_NUC) %>%
# tidyr::separate(data = ., col = GT_VCF_NUC, into = c("A1", "A2"), sep = "/") %>%
# tidyr::gather(data = ., key = ALLELES_GROUP, value = ALLELES, -MARKERS) %>%
separate_gt(x = ., exclude = "MARKERS", parallel.core = parallel.core) %>%
dplyr::distinct(MARKERS, HAPLOTYPES) %>%
dplyr::count(x = ., MARKERS)
Expand Down
2 changes: 1 addition & 1 deletion R/detect_biallelic_problems.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ detect_biallelic_problems <- function(
dplyr::mutate(REF = dplyr::if_else(REF == max(REF), "REF", "ALT")) %>%
dplyr::ungroup(.) %>%
dplyr::group_by_at(dplyr::vars(c(markers.metadata, "ALLELES", "REF"))) %>%
tidyr::spread(data = ., key = POP_ID, value = N) %>%
tidyr::pivot_wider(data = ., names_from = "POP_ID", values_from = "N") %>%
readr::write_tsv(x = ., path = blacklist.info.filename, na = "-")
res$blacklist.info <- blacklist.info
dodgy.markers <- dplyr::n_distinct(blacklist.info$MARKERS)
Expand Down
11 changes: 7 additions & 4 deletions R/detect_duplicate_genomes.R
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,6 @@ detect_duplicate_genomes <- function(
data = ., id.vars = c("MARKERS", "INDIVIDUALS"), variable.name = "ALLELES", value.name = "n",
variable.factor = FALSE) %>%
tibble::as_tibble(.) %>%
# tidyr::gather(data = ., key = ALLELES, value = n, -c(MARKERS, INDIVIDUALS)) %>%
dplyr::mutate(MARKERS_ALLELES = stringi::stri_join(MARKERS, ALLELES, sep = ".")) %>%
dplyr::select(-ALLELES) %>%
dplyr::arrange(MARKERS_ALLELES, INDIVIDUALS)
Expand Down Expand Up @@ -996,7 +995,7 @@ distance_individuals <- function(
value.var = "n"
) %>%
tibble::as_tibble(.) %>%
tibble::remove_rownames(.) %>%
tibble::remove_rownames(.data = .) %>%
tibble::column_to_rownames(.data = ., var = "INDIVIDUALS"))

x <- suppressWarnings(
Expand Down Expand Up @@ -1221,8 +1220,12 @@ allele_count <- function(x) {
A2 = stringi::stri_sub(str = GT, from = 4, to = 6)
) %>%
dplyr::select(-GT) %>%
tidyr::gather(
data = ., key = ALLELES, value = GT, -c(MARKERS, INDIVIDUALS)) %>%
tidyr::pivot_longer(
data = .,
cols = -c("INDIVIDUALS", "MARKERS"),
names_to = "ALLELES",
values_to = "GT"
) %>%
dplyr::arrange(MARKERS, INDIVIDUALS, GT) %>%
dplyr::count(x = ., INDIVIDUALS, MARKERS, GT) %>%
dplyr::ungroup(.) %>%
Expand Down
23 changes: 19 additions & 4 deletions R/detect_het_outliers.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,12 @@ res$summary.alt.allele <- dplyr::ungroup(res$outlier.summary$het.summary) %>%
ONE_HET_ONLY = length(MARKERS[HOM_ALT == 0 & HET == 1]),
ONE_HOM_ALT_ONE_HET_ONLY = length(MARKERS[HOM_ALT == 1 & HET == 1])
) %>%
tidyr::gather(data = ., key = MARKERS, value = NUMBERS) %>%
tidyr::pivot_longer(
data = .,
cols = dplyr::everything(),
names_to = "MARKERS",
values_to = "NUMBERS"
) %>%
dplyr::mutate(PROPORTION = NUMBERS / (NUMBERS[MARKERS == "TOTAL"]))

# Estimate heterozygotes miscall rate -------------------------------------------
Expand Down Expand Up @@ -397,11 +402,21 @@ plot_het_outliers <- function(data, path.folder = NULL) {
freq.summary <- dplyr::bind_cols(
res$het.summary %>%
dplyr::select(MARKERS, POP_ID, HOM_REF = FREQ_HOM_REF_O, HET = FREQ_HET_O, HOM_ALT = FREQ_HOM_ALT_O) %>%
tidyr::gather(data = ., key = GENOTYPES, value = OBSERVED, -c(MARKERS, POP_ID)) %>%
tidyr::pivot_longer(
data = .,
cols = -c("POP_ID", "MARKERS"),
names_to = "GENOTYPES",
values_to = "OBSERVED"
) %>%
dplyr::arrange(MARKERS, POP_ID),
res$het.summary %>%
dplyr::select(MARKERS, POP_ID, HOM_REF = FREQ_HOM_REF_E, HET = FREQ_HET_E, HOM_ALT = FREQ_HOM_ALT_E) %>%
tidyr::gather(data = ., key = GENOTYPES, value = EXPECTED, -c(MARKERS, POP_ID)) %>%
tidyr::pivot_longer(
data = .,
cols = -c("POP_ID", "MARKERS"),
names_to = "GENOTYPES",
values_to = "EXPECTED"
) %>%
dplyr::arrange(MARKERS, POP_ID) %>%
dplyr::select(EXPECTED)
) %>%
Expand Down Expand Up @@ -507,7 +522,7 @@ estimate_m <- function(
) {
D <- dplyr::select(data, INDIVIDUALS, MARKERS, GT_BIN) %>%
dplyr::group_by(INDIVIDUALS) %>%
tidyr::spread(data = ., key = MARKERS, value = GT_BIN) %>%
tidyr::pivot_wider(data = ., names_from = "MARKERS", values_from = "GT_BIN") %>%
dplyr::ungroup(.) %>%
dplyr::select(-INDIVIDUALS) %>%
as.matrix(.)
Expand Down
Loading

0 comments on commit 232317c

Please sign in to comment.