Skip to content

Commit

Permalink
small update with mod that detect ref genome better
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrygosselin committed Mar 14, 2019
1 parent ee5088a commit 1ae4a4e
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 22 deletions.
2 changes: 2 additions & 0 deletions R/detect_ref_genome.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ detect_ref_genome <- function(chromosome = NULL, data = NULL, verbose = TRUE) {
# if the chrom.unique > 1 more likely not to be de novo assembly (e.g. with old stacks version)
chrom.unique <- length(unique(ref.genome)) == 1
chrom.unique.radiator <- any(unique(ref.genome) == "CHROM_1")
chrom.unique.stacks <- any(unique(ref.genome) == "un")

# presence of underscore or other separator: more likely ref genome
chrom.sep <- TRUE %in%
Expand All @@ -112,6 +113,7 @@ detect_ref_genome <- function(chromosome = NULL, data = NULL, verbose = TRUE) {
ref.genome <- FALSE
}
if (chrom.unique.radiator) ref.genome <- FALSE
if (chrom.unique.stacks) ref.genome <- FALSE
}
}
chrom.unique <- chrom.alpha <- chrom.sep <- chrom.unique.radiator <- NULL
Expand Down
13 changes: 8 additions & 5 deletions R/filter_genotyping.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,18 @@ filter_genotyping <- function(
.f = mis_many_markers,
x = info),
BLACKLISTED_MARKERS = n.markers - WHITELISTED_MARKERS
) %>%
readr::write_tsv(
x = .,
path = file.path(path.folder, "genotyping.helper.table.tsv"))
)

readr::write_tsv(
x = helper.table,
path = file.path(path.folder, "genotyping.helper.table.tsv")
)

# checking if strata present
strata <- extract_individuals(
gds = data,
ind.field.select = c("INDIVIDUALS", "STRATA"))
ind.field.select = c("INDIVIDUALS", "STRATA"),
whitelist = TRUE)
if (!is.null(strata$STRATA)) {
m.strata <- missing_per_pop(
gds = data, strata = strata, parallel.core = parallel.core)
Expand Down
6 changes: 6 additions & 0 deletions R/filter_snp_position_read.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ filter_snp_position_read <- function(
want <- c("MARKERS", "CHROM", "LOCUS", "POS", "COL")
if (data.type == "SeqVarGDSClass") {
wl <- bl <- extract_markers_metadata(gds = data, whitelist = TRUE)

# check for presence of COL info...
if (!rlang::has_name(wl, "COL")) {
message("COL info required, returning data")
return(data)
}
if (!is.numeric(wl$COL)) wl$COL <- as.numeric(wl$COL)
} else {
wl <- bl <- dplyr::select(data, dplyr::one_of(want))
Expand Down
68 changes: 51 additions & 17 deletions R/vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ read_vcf <- function(
f = path.folder,
rad.folder = "import_gds",
prefix_int = TRUE,
internal = FALSE,
internal = internal,
file.date = file.date,
verbose = verbose)

Expand Down Expand Up @@ -419,16 +419,16 @@ read_vcf <- function(
readr::write_lines(x = random.seed, path = file.path(radiator.folder, "random.seed"))
if (verbose) message("File written: random.seed (", random.seed,")")

# radiator_parameters: generate --------------------------------------------
filters.parameters <- radiator_parameters(
generate = TRUE,
initiate = FALSE,
update = FALSE,
parameter.obj = parameters,
path.folder = radiator.folder,
file.date = file.date,
verbose = verbose,
internal = internal)
# # radiator_parameters: generate --------------------------------------------
# filters.parameters <- radiator_parameters(
# generate = TRUE,
# initiate = FALSE,
# update = FALSE,
# parameter.obj = parameters,
# path.folder = radiator.folder,
# file.date = file.date,
# verbose = verbose,
# internal = internal)

# VCF: Read ------------------------------------------------------------------
timing.vcf <- proc.time()
Expand Down Expand Up @@ -484,10 +484,6 @@ read_vcf <- function(
# Generate radiator skeleton -------------------------------------------------
if (verbose) message("\nAnalyzing the data...")
radiator.gds <- radiator_gds_skeleton(gds)
# Blacklist of individuals: generate
# bl.i <- update_bl_individuals(gds = gds, generate = TRUE)
# Blacklist of markers: generate
# bl.gds <- update_bl_markers(gds = gds, generate = TRUE)

# VCF: source ----------------------------------------------------------------
update_radiator_gds(gds = gds, node.name = "source", value = source)
Expand Down Expand Up @@ -525,9 +521,11 @@ read_vcf <- function(
pop.levels = pop.levels,
pop.labels = pop.labels,
pop.select = pop.select,
blacklist.id = blacklist.id) %$% strata
blacklist.id = blacklist.id,
keep.two = FALSE) %$% strata

if (!is.null(strata)) {
id.levels <- individuals$INDIVIDUALS
individuals %<>%
dplyr::left_join(
join_strata(individuals, strata, verbose = verbose) %>%
Expand All @@ -540,9 +538,32 @@ read_vcf <- function(
if (nrow(bl) != 0) {
if (verbose) message(" Number of sample blacklisted by the stata: ", nrow(bl))
}

# renaming ?
if (rlang::has_name(individuals, "NEW_ID")) {
if (!identical(id.levels, individuals$INDIVIDUALS)) {
rlang::abort("Wrong id order, contact author")
}
# replace id in GDS
update_radiator_gds(
gds = gds,
radiator.gds = FALSE,
node.name = "sample.id",
value = individuals$NEW_ID,
replace = TRUE)

individuals %<>% dplyr::rename(INDIVIDUALS = NEW_ID, OLD_ID = INDIVIDUALS)

}


} else {
individuals %<>% dplyr::mutate(STRATA = 1L, FILTERS = "whitelist")
}




strata <- generate_strata(data = dplyr::filter(individuals, FILTERS == "whitelist"))
#Update GDS node
update_radiator_gds(gds = gds, node.name = "individuals", value = individuals, sync = TRUE)
Expand Down Expand Up @@ -626,6 +647,17 @@ read_vcf <- function(
replace = TRUE
)

# # radiator_parameters: generate --------------------------------------------
filters.parameters <- radiator_parameters(
generate = TRUE,
initiate = FALSE,
update = FALSE,
parameter.obj = parameters,
path.folder = radiator.folder,
file.date = file.date,
verbose = verbose,
internal = internal)

# radiator_parameters: initiate --------------------------------------------
# with original VCF's values
filters.parameters <- radiator_parameters(
Expand All @@ -639,7 +671,9 @@ read_vcf <- function(
values = "",
path.folder = path.folder,
file.date = file.date,
verbose = FALSE)
internal = internal,
verbose = verbose)


##______________________________________________________________________######
# Filters --------------------------------------------------------------------
Expand Down

0 comments on commit 1ae4a4e

Please sign in to comment.