Skip to content

Commit

Permalink
* worked on strata section to make the code cleaner
Browse files Browse the repository at this point in the history
* bug fix using ranked method and adegenet ONLY. A section of the code was deleted. I should have kept if (assignment.analysis == "gsi_sim) and it was deleted... causing adegenet function to work is way through that piece of code made for gsi_sim, resulting in 100% assignment rate !
  • Loading branch information
thierrygosselin committed May 3, 2016
1 parent 4b1786c commit cf0eb1e
Showing 1 changed file with 41 additions and 11 deletions.
52 changes: 41 additions & 11 deletions R/assignment_ngs.R
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ assignment_ngs <- function(data,
}
}


# Create a folder based on filename to save the output files *****************
if (is.null(folder)) {
# Get date and time to have unique filenaming
Expand Down Expand Up @@ -613,7 +614,7 @@ assignment_ngs <- function(data,
data.table = FALSE)

# remove "_" in individual name and replace with "-"
strata.df$INDIVIDUALS <- stri_replace_all_fixed(str = strata.df$INDIVIDUALS, pattern = "_", replacement = "-", vectorize_all = TRUE)
strata.df$INDIVIDUALS <- stri_replace_all_fixed(str = strata.df$INDIVIDUALS, pattern = c("_", ":"), replacement = c("-", "-"), vectorize_all = TRUE)

tped.header.prep <- strata.df %>%
select(INDIVIDUALS) %>%
Expand Down Expand Up @@ -676,7 +677,12 @@ assignment_ngs <- function(data,
# Using the argument strata if provided to replace the current one
if (!is.null(strata)) {
strata.df <- read_tsv(file = strata, col_names = TRUE, col_types = "cc") %>%
rename(POP_ID = STRATA)
rename(POP_ID = STRATA) %>%
mutate(INDIVIDUALS = stri_replace_all_fixed(str = INDIVIDUALS,
pattern = c("_", ":"),
replacement = c("-", "-"),
vectorize_all = TRUE)
)
}

# Make tidy
Expand All @@ -686,7 +692,7 @@ assignment_ngs <- function(data,
mutate(INDIVIDUALS = stri_replace_all_fixed(str = INDIVIDUALS_ALLELES, pattern = c("_A1", "_A2"), replacement = "", vectorize_all = FALSE)) %>%
left_join(strata.df, by = "INDIVIDUALS") %>%
mutate(
POP_ID = factor(POP_ID, levels = pop.levels, ordered =TRUE),
POP_ID = factor(stri_replace_all_fixed(POP_ID, pop.levels, pop.labels, vectorize_all = FALSE), levels = unique(pop.labels), ordered = TRUE),
GT = stri_pad_left(str = GT, width = 3, pad = "0")
)

Expand Down Expand Up @@ -752,7 +758,11 @@ assignment_ngs <- function(data,
tidyr::gather(key = LOCUS, value = GT, -c(INDIVIDUALS, POP_ID)) %>%
mutate(
GT = as.character(GT),
GT = stri_pad_left(str= GT, pad = "0", width = 6)
GT = stri_pad_left(str= GT, pad = "0", width = 6),
INDIVIDUALS = stri_replace_all_fixed(str = INDIVIDUALS,
pattern = c("_", ":"),
replacement = c("-", "-"),
vectorize_all = TRUE)
)


Expand Down Expand Up @@ -781,7 +791,13 @@ assignment_ngs <- function(data,
distinct(INDIVIDUALS)
} else {
strata.df <- read_tsv(file = strata, col_names = TRUE, col_types = "cc") %>%
rename(POP_ID = STRATA)
rename(POP_ID = STRATA) %>%
mutate(
INDIVIDUALS = stri_replace_all_fixed(str = INDIVIDUALS,
pattern = c("_", ":"),
replacement = c("-", "-"),
vectorize_all = TRUE)
)

input <- input %>%
mutate(INDIVIDUALS = as.character(INDIVIDUALS)) %>%
Expand Down Expand Up @@ -812,7 +828,13 @@ assignment_ngs <- function(data,
select(-Cnt) %>%
rename(LOCUS = `Catalog ID`) %>%
tidyr::gather(INDIVIDUALS, GT, -LOCUS) %>%
mutate(LOCUS = as.character(LOCUS))
mutate(
LOCUS = as.character(LOCUS),
INDIVIDUALS = stri_replace_all_fixed(str = INDIVIDUALS,
pattern = c("_", ":"),
replacement = c("-", "-"),
vectorize_all = TRUE)
)

# Filter with whitelist of markers
if (!is.null(whitelist.markers)) {
Expand All @@ -836,7 +858,13 @@ assignment_ngs <- function(data,
)
} else { # Make population ready with the strata provided
strata.df <- read_tsv(file = strata, col_names = TRUE, col_types = "cc") %>%
rename(POP_ID = STRATA)
rename(POP_ID = STRATA) %>%
mutate(
INDIVIDUALS = stri_replace_all_fixed(str = INDIVIDUALS,
pattern = c("_", ":"),
replacement = c("-", "-"),
vectorize_all = TRUE)
)

input <- input %>%
mutate(INDIVIDUALS = as.character(INDIVIDUALS)) %>%
Expand Down Expand Up @@ -2552,7 +2580,7 @@ Progress can be monitored with activity in the folder...")
}
}
write_tsv(x = assignment.res, path = paste0(directory.subsample,filename.assignment.res), col_names = TRUE, append = FALSE)
} else {
} else { # with adegenet
if (is.null(subsample)) {
if (imputation.method == FALSE) {
filename.assignment.res <- stri_join("assignment", sampling.method, "no.imputation", "results", "iterations", "tsv", sep = ".")
Expand Down Expand Up @@ -3038,7 +3066,8 @@ Progress can be monitored with activity in the folder...")
# thl != 1 or "all"
# summary stats
if (assignment.analysis == "adegenet") {
assignment.res.summary <- assignment.res.summary %>%
assignment.res.summary.prep <- assignment.res.summary %>%
# assignment.res.summary <- assignment.res.summary %>%
group_by(CURRENT, INFERRED, ITERATIONS, MARKER_NUMBER, MISSING_DATA, METHOD) %>%
tally %>%
group_by(CURRENT) %>%
Expand All @@ -3049,7 +3078,8 @@ Progress can be monitored with activity in the folder...")
select(-n, -TOTAL)
}

assignment.res.summary.prep <- assignment.res.summary %>%
if (assignment.analysis == "gsi_sim") {
assignment.res.summary.prep <- assignment.res.summary %>%
group_by(CURRENT, MARKER_NUMBER, METHOD, MISSING_DATA, ITERATIONS) %>%
summarise(
n = length(CURRENT[as.character(CURRENT) == as.character(INFERRED)]),
Expand All @@ -3058,6 +3088,7 @@ Progress can be monitored with activity in the folder...")
ungroup() %>%
mutate(ASSIGNMENT_PERC = round(n/TOTAL*100, 0)) %>%
select(-n, -TOTAL)
}

if (is.null(subsample)) {
if (imputation.method == FALSE) {
Expand Down Expand Up @@ -3294,4 +3325,3 @@ Progress can be monitored with activity in the folder...")
res.list <- list(assignment = res, plot.assignment = plot.assignment)
return(res.list)
} # End assignment_ngs

0 comments on commit cf0eb1e

Please sign in to comment.