Skip to content

Commit

Permalink
add param sum_to_one to snp_ancestry_summary()
Browse files Browse the repository at this point in the history
  • Loading branch information
privefl committed Aug 13, 2024
1 parent c0f15be commit 1e5495c
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 8 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Encoding: UTF-8
Package: bigsnpr
Type: Package
Title: Analysis of Massive SNP Arrays
Version: 1.12.9
Date: 2024-05-15
Version: 1.12.10
Date: 2024-08-13
Authors@R: c(
person("Florian", "Privé", email = "[email protected]", role = c("aut", "cre")),
person("Michael", "Blum", role = "ths"),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## bigsnpr 1.12.10

- In function `snp_ancestry_summary()`, add parameter `sum_to_one` to optionally allows for ancestry coefficients to have a sum lower than 1 (when `FALSE`; default is `TRUE`).

## bigsnpr 1.12.9

- In function `snp_modifyBuild()`, you can now provide `local_chain` as a vector of two, for when using `check_reverse`. You can now also modify the `base_url` from where to download the chain files.
Expand Down
10 changes: 6 additions & 4 deletions R/ancestry-summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#' Default is 0.4. When correlation is lower, an error is returned.
#' For individual genotypes, this should be larger than 0.6.
#' For allele frequencies, this should be larger than 0.9.
#' @param sum_to_one Whether to force ancestry coefficients to sum to 1?
#' Default is `TRUE` (otherwise, the sum can be lower than 1).
#'
#' @return Vector of coefficients representing the ancestry proportions.
#' Also (as attributes) `cor_each`, the correlation between input
Expand All @@ -27,7 +29,7 @@
#' @example examples/example-ancestry-summary.R
#'
snp_ancestry_summary <- function(freq, info_freq_ref, projection, correction,
min_cor = 0.4) {
min_cor = 0.4, sum_to_one = TRUE) {

assert_package("quadprog")
assert_nona(freq)
Expand All @@ -53,9 +55,9 @@ snp_ancestry_summary <- function(freq, info_freq_ref, projection, correction,
res <- quadprog::solve.QP(
Dmat = cp_X_pd$mat,
dvec = crossprod(y, X),
Amat = cbind(1, diag(ncol(X))),
bvec = c(1, rep(0, ncol(X))),
meq = 1
Amat = cbind(-1, diag(ncol(X))),
bvec = c(-1, rep(0, ncol(X))),
meq = `if`(sum_to_one, 1, 0)
)

cor_pred <- drop(cor(drop(X0 %*% res$solution), freq))
Expand Down
6 changes: 5 additions & 1 deletion docs/news/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/snp_ancestry_summary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions tests/testthat/test-5-match.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,13 @@ test_that("snp_ancestry_summary() works (with no projection here)", {
res <- snp_ancestry_summary(y, X, Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)))
expect_equal(res, prop, check.attributes = FALSE)
expect_equal(attr(res, "cor_pred"), 1)
expect_equal(sum(res), 1)

res.2 <- snp_ancestry_summary(y, X, Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)),
sum_to_one = FALSE)
expect_equal(res.2, prop, check.attributes = FALSE)
expect_equal(attr(res.2, "cor_pred"), 1)
expect_equal(sum(res.2), 1)

expect_warning(res2 <- snp_ancestry_summary(
y, X[, -1], Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X))),
Expand All @@ -156,6 +163,24 @@ test_that("snp_ancestry_summary() works (with no projection here)", {
expect_equal(res2, prop[-1], tolerance = 0.1, check.attributes = FALSE)
expect_equal(attr(res2, "cor_pred"), drop(cor(y, X[, -1] %*% res2)))

expect_warning(res2.2 <- snp_ancestry_summary(
y, X[, -1], Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)), sum_to_one = FALSE),
"The solution does not perfectly match the frequencies.")
expect_true(all(res2.2 > prop[-1]))
expect_equal(res2.2, prop[-1], tolerance = 0.1, check.attributes = FALSE)
expect_equal(attr(res2.2, "cor_pred"), drop(cor(y, X[, -1] %*% res2.2)))
# better than when forced to sum to 1
expect_true(all(res2.2 < res2))
expect_gt(attr(res2.2, "cor_pred"), attr(res2, "cor_pred"))
expect_lt(sum(res2.2), 1)

res.3 <- snp_ancestry_summary(y, cbind(X, rbeta(1000, 1, 3)),
Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)),
sum_to_one = sample(c(TRUE, FALSE), 1))
expect_equal(res.3, c(prop, 0), check.attributes = FALSE)
expect_equal(attr(res.3, "cor_pred"), 1)
expect_equal(sum(res.3), 1)

expect_error(res3 <- snp_ancestry_summary(
y, X[, -3], Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)), min_cor = 0.6),
"Correlation between frequencies is too low:")
Expand Down

0 comments on commit 1e5495c

Please sign in to comment.