Skip to content

Commit

Permalink
consistency in output files naming in assignment_ngs
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrygosselin committed Apr 15, 2016
1 parent 4a1b9a4 commit b57cb7e
Show file tree
Hide file tree
Showing 11 changed files with 1,344 additions and 201 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.Rproj.user
.Rhistory
.RData
inst/doc
40 changes: 20 additions & 20 deletions R/assignment_mixture.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@
#' Note that for other file type, use stackr package for haplotype file and
#' create a whitelist, for plink and data frames, use PLINK linkage
#' disequilibrium based SNP pruning option.
#' @param common.markers (optional) Logical. Default = \code{FALSE}.
#' With \code{TRUE}, will keep markers genotyped in all the populations.
#' @param common.markers (optional) Logical. Default: \code{common.markers = TRUE},
#' will only keep markers in common (genotyped) between all the baseline samples (populations).


#' @param maf.thresholds (string, double, optional) String with
#' local/populations and global/overall maf thresholds, respectively.
#' Default: \code{maf.thresholds = NULL}.
#' local/populations and global/overall Minor Allele Frequency (maf) thresholds, respectively.
#' Default: \code{maf.thresholds = NULL}. The maf is calculated on the baseline samples only.
#' e.g. \code{maf.thresholds = c(0.05, 0.1)} for a local maf threshold
#' of 0.05 and a global threshold of 0.1. Available for VCF, PLINK and data frame
#' files. Use stackr for haplotypes files.
Expand Down Expand Up @@ -402,7 +402,7 @@ assignment_mixture <- function(data,
if (missing(monomorphic.out)) monomorphic.out <- TRUE # remove monomorphic
if (missing(blacklist.genotype)) blacklist.genotype <- NULL # no genotype to erase
if (missing(snp.ld)) snp.ld <- NULL
if (missing(common.markers)) common.markers <- FALSE
if (missing(common.markers)) common.markers <- TRUE
if (missing(maf.thresholds)) maf.thresholds <- NULL
if (missing(maf.pop.num.threshold)) maf.pop.num.threshold <- 1
if (missing(maf.approach)) maf.approach <- "SNP"
Expand Down Expand Up @@ -1082,7 +1082,8 @@ haplotype file and create a whitelist, for other file type, use
# This need to be moved while doing the assignment
if (common.markers == TRUE) { # keep only markers present in all pop
message("Using markers common in all populations:")
pop.number <- input %>%
pop.number <- input %>%
filter(!INDIVIDUALS %in% mixture.df$INDIVIDUALS) %>%
select(POP_ID) %>%
filter(POP_ID != "mixture")

Expand Down Expand Up @@ -1563,7 +1564,7 @@ package and update your whitelist")
filter(!INDIVIDUALS %in% mixture.df$INDIVIDUALS) %>%
mutate(POP_ID = droplevels(POP_ID))

strata.df.subsample <- input.prep %>%
strata.df.impute <- input.prep %>%
select(INDIVIDUALS, POP_ID) %>%
distinct(INDIVIDUALS, POP_ID)

Expand Down Expand Up @@ -1638,15 +1639,15 @@ package and update your whitelist")
input.imp <- impute_genotype_rf(input.imp) # impute globally
input.imp <- plyr::colwise(as.character, exclude = NA)(input.imp)
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS) %>%
ungroup()
)
}

if (impute.mixture == FALSE) {
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS) %>%
ungroup()
)
Expand All @@ -1671,7 +1672,6 @@ package and update your whitelist")
input.imp <- plyr::colwise(as.character, exclude = NA)(input.imp)
input.prep <- NULL # remove unused object


# combine the mixture (no imputation) + the imputed baseline
input.imp <- suppressWarnings(
bind_rows(input.imp,
Expand All @@ -1687,15 +1687,15 @@ package and update your whitelist")
input.imp <- impute_genotype_rf(input.imp) # impute globally
input.imp <- plyr::colwise(as.character, exclude = NA)(input.imp)
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS) %>%
ungroup()
)
}

if (impute.mixture == FALSE) {
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS) %>%
ungroup()
)
Expand Down Expand Up @@ -1774,15 +1774,15 @@ package and update your whitelist")
)

input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS) %>%
ungroup()
)
}

if (impute.mixture == FALSE) {
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS) %>%
mutate(
GT = stri_replace_na(GT, replacement = "000000")
Expand Down Expand Up @@ -1829,15 +1829,15 @@ package and update your whitelist")
)

input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS, ALLELES) %>%
ungroup()
)
}

if (impute.mixture == FALSE) {
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS) %>%
mutate(
GT = stri_replace_na(GT, replacement = "000")
Expand Down Expand Up @@ -1887,15 +1887,15 @@ package and update your whitelist")
)

input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS) %>%
ungroup()
)
}

if (impute.mixture == FALSE) {
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS) %>%
mutate(
GT = stri_replace_na(GT, replacement = "000000")
Expand Down Expand Up @@ -1934,14 +1934,14 @@ package and update your whitelist")
)

input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS, ALLELES) %>%
ungroup()
)
}
if (impute.mixture == FALSE) {
input.imp <- suppressWarnings(
left_join(strata.df.subsample, input.imp, by = "INDIVIDUALS") %>%
left_join(strata.df.impute, input.imp, by = "INDIVIDUALS") %>%
arrange(POP_ID, INDIVIDUALS, MARKERS) %>%
mutate(
GT = stri_replace_na(GT, replacement = "000")
Expand Down
Loading

0 comments on commit b57cb7e

Please sign in to comment.