From e7fb7ebedc919914bdd70b347f624639bf0d936d Mon Sep 17 00:00:00 2001 From: Patrick Breheny Date: Tue, 26 Feb 2019 15:08:48 -0600 Subject: [PATCH] Version 3.2-1 --- DESCRIPTION | 6 +++--- NEWS | 8 +++++++- R/G.R | 2 +- R/cv-grpreg.R | 25 +++++++++++++++++-------- R/{cv.grpsurv.R => cv-grpsurv.R} | 16 +++++++++++++++- README.md | 4 ++-- inst/tests/cv.R | 32 +++++++++++++++----------------- 7 files changed, 60 insertions(+), 33 deletions(-) rename R/{cv.grpsurv.R => cv-grpsurv.R} (82%) diff --git a/DESCRIPTION b/DESCRIPTION index e41bb61..0e3a071 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: grpreg Title: Regularization Paths for Regression Models with Grouped Covariates -Version: 3.2-0 -Date: 2018-09-27 +Version: 3.2-1 +Date: 2019-02-26 Authors@R: c( person("Patrick", "Breheny", role=c("aut","cre"), email="patrick-breheny@uiowa.edu", comment=c(ORCID="000-0002-0650-1119")), person("Yaohui", "Zeng", role="ctb")) @@ -18,4 +18,4 @@ BugReports: http://github.com/pbreheny/grpreg/issues License: GPL-3 URL: http://pbreheny.github.io/grpreg, https://github.com/pbreheny/grpreg LazyData: TRUE -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 diff --git a/NEWS b/NEWS index f3d8e5d..ef19708 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,10 @@ -# grpreg 3.2-0 (2018-XX-XX) +# grpreg 3.2-1 (2019-02-26) + * Change: Cross-validation now balances censoring across folds for survival + models + * Fixed: Leave-one-out cross-validation now works correctly for logistic + regression + +# grpreg 3.2-0 (2018-09-27) * New: cv.grpsurv now calculates SE, with bootstrap option * Change: R^2 now consistently uses the Cox-Snell definition for all types of models diff --git a/R/G.R b/R/G.R index 118961b..666fb77 100644 --- a/R/G.R +++ b/R/G.R @@ -16,7 +16,7 @@ setupG <- function(group, m, bilevel) { } else { #if (all.equal(sort(names(m)), sort(group))) TRY <- try(as.integer(group)==g) - if (class(TRY)=='try-error' || !TRY) stop('Attempting to set group.multiplier is ambiguous if group is not a factor') + if (class(TRY)=='try-error' || any(!TRY)) stop('Attempting to set group.multiplier is ambiguous if group is not a factor') if (length(m) != length(lev)) stop("Length of group.multiplier must equal number of penalized groups") if (storage.mode(m) != "double") storage.mode(m) <- "double" } diff --git a/R/cv-grpreg.R b/R/cv-grpreg.R index 8752c7d..aec3ee2 100644 --- a/R/cv-grpreg.R +++ b/R/cv-grpreg.R @@ -19,23 +19,32 @@ cv.grpreg <- function(X, y, group=1:ncol(X), ..., nfolds=10, seed, fold, returnY # Set up folds if (!missing(seed)) set.seed(seed) n <- length(y) + + if (missing(fold)) { if (m > 1) { nn <- n/m - fold <- rep(ceiling(sample(1:nn)/nn*nfolds), each=m) - } else if (fit$family=="binomial" & (min(table(y)) > nfolds)) { + fold_ <- sample(1:nn %% (nfolds)) + fold_[fold_==0] <- nfolds + fold <- rep(fold_, each=m) + } else if (fit$family=="binomial") { ind1 <- which(y==1) ind0 <- which(y==0) n1 <- length(ind1) n0 <- length(ind0) - fold1 <- ceiling(sample(1:n1)/n1*nfolds) - fold0 <- ceiling(sample(1:n0)/n0*nfolds) + fold1 <- 1:n1 %% nfolds + fold0 <- (n1 + 1:n0) %% nfolds + fold1[fold1==0] <- nfolds + fold0[fold0==0] <- nfolds fold <- numeric(n) - fold[y==1] <- fold1 - fold[y==0] <- fold0 + fold[y==1] <- sample(fold1) + fold[y==0] <- sample(fold0) } else { - fold <- ceiling(sample(1:n)/n*nfolds) + fold <- sample(1:n %% nfolds) + fold[fold==0] <- nfolds } + } else { + nfolds <- max(fold) } # Do cross-validation @@ -81,7 +90,7 @@ cvf <- function(i, X, y, fold, cv.args) { X2 <- X[fold==i, , drop=FALSE] y2 <- y[fold==i] - yhat <- predict(fit.i, X2, type="response") + yhat <- matrix(predict(fit.i, X2, type="response"), length(y2)) loss <- loss.grpreg(y2, yhat, fit.i$family) pe <- if (fit.i$family=="binomial") {(yhat < 0.5) == y2} else NULL list(loss=loss, pe=pe, nl=length(fit.i$lambda), yhat=yhat) diff --git a/R/cv.grpsurv.R b/R/cv-grpsurv.R similarity index 82% rename from R/cv.grpsurv.R rename to R/cv-grpsurv.R index 9ebd827..ff52094 100644 --- a/R/cv.grpsurv.R +++ b/R/cv-grpsurv.R @@ -18,7 +18,21 @@ cv.grpsurv <- function(X, y, group, ..., nfolds=10, seed, fold, se=c('quick', 'b # Set up folds n <- nrow(X) if (!missing(seed)) set.seed(seed) - if (missing(fold)) fold <- ceiling(sample(1:n)/n*nfolds) + if (missing(fold)) { + ind1 <- which(fit$fail==1) + ind0 <- which(fit$fail==0) + n1 <- length(ind1) + n0 <- length(ind0) + fold1 <- 1:n1 %% nfolds + fold0 <- (n1 + 1:n0) %% nfolds + fold1[fold1==0] <- nfolds + fold0[fold0==0] <- nfolds + fold <- numeric(n) + fold[fit$fail==1] <- sample(fold1) + fold[fit$fail==0] <- sample(fold0) + } else { + nfolds <- max(fold) + } Y <- matrix(NA, nrow=n, ncol=length(fit$lambda)) cv.args <- list(...) diff --git a/README.md b/README.md index 17f2cb5..012aba0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -![version](http://www.r-pkg.org/badges/version/grpreg) -![downloads](http://cranlogs.r-pkg.org/badges/grpreg) +[![version](http://www.r-pkg.org/badges/version/grpreg)](https://cran.r-project.org/package=grpreg) +[![downloads](http://cranlogs.r-pkg.org/badges/grpreg)](https://cran.r-project.org/package=grpreg) [![codecov.io](https://codecov.io/github/pbreheny/grpreg/coverage.svg?branch=master)](https://codecov.io/github/pbreheny/grpreg?branch=master) # Regularization Paths for Regression Models with Grouped Covariates diff --git a/inst/tests/cv.R b/inst/tests/cv.R index 9f97a88..834e8ef 100644 --- a/inst/tests/cv.R +++ b/inst/tests/cv.R @@ -6,8 +6,8 @@ X <- matrix(rnorm(n*p),ncol=p) y <- rnorm(n) cvfit <- cv.grpreg(X, y, group, penalty='grLasso') cvfit <- cv.grpreg(X, y, group, penalty='gel') -cvfit <- cv.grpreg(X, y, group, penalty='grLasso', nfolds=50) -cvfit <- cv.grpreg(X, y, group, penalty='gel', nfolds=50) +cvfit <- cv.grpreg(X, y, group, penalty='grLasso', fold=1:50) +cvfit <- cv.grpreg(X, y, group, penalty='gel', fold=1:50) .test = "Cross-validation: binomial" n <- 50 @@ -17,8 +17,8 @@ X <- matrix(rnorm(n*p),ncol=p) y <- runif(n) > 0.5 cvfit <- cv.grpreg(X, y, group, family='binomial', penalty='grLasso') cvfit <- cv.grpreg(X, y, group, family='binomial', penalty='gel') -cvfit <- cv.grpreg(X, y, group, family='binomial', penalty='grLasso', nfolds=50) -cvfit <- cv.grpreg(X, y, group, family='binomial', penalty='gel', nfolds=50) +cvfit <- cv.grpreg(X, y, group, family='binomial', penalty='grLasso', fold=1:50) +cvfit <- cv.grpreg(X, y, group, family='binomial', penalty='gel', fold=1:50) .test = "Cross-validation: poisson" n <- 50 @@ -28,22 +28,20 @@ X <- matrix(rnorm(n*p),ncol=p) y <- sample(1:n) cvfit <- cv.grpreg(X, y, group, family='poisson', penalty='grLasso') cvfit <- cv.grpreg(X, y, group, family='poisson', penalty='gel') -cvfit <- cv.grpreg(X, y, group, family='poisson', penalty='grLasso', nfolds=50) -cvfit <- cv.grpreg(X, y, group, family='poisson', penalty='gel', nfolds=50) +cvfit <- cv.grpreg(X, y, group, family='poisson', penalty='grLasso', fold=1:50) +cvfit <- cv.grpreg(X, y, group, family='poisson', penalty='gel', fold=1:50) .test = "Cross-validation: multitask learning" n <- 50 -group <- rep(0:4,5:1) -p <- length(group) -X <- matrix(rnorm(n*p),ncol=p) -y <- rnorm(n) -cvfit <- cv.grpreg(X, y, group, returnY=TRUE) -cve <- apply(cvfit$Y - y, 2, crossprod)/n -check(cve, cvfit$cve, tol= .001) -y <- rnorm(n) > 0 -cvfit <- cv.grpreg(X, y, group, family='binomial', returnY=TRUE, lambda.min=0.5) -pe <- apply((cvfit$Y>0.5)!=y, 2, mean) -check(pe, cvfit$pe, tol= .001) +p <- 10 +m <- 4 +X <- matrix(rnorm(n*p), ncol=p) +Y <- matrix(rnorm(n*m), ncol=m) +cvfit <- cv.grpreg(X, Y) +cvfit <- cv.grpreg(X, Y, nfolds=50) +Y <- matrix(rnorm(n*m), ncol=m) > 0 +cvfit <- cv.grpreg(X, Y, family='binomial') +cvfit <- cv.grpreg(X, Y, family='binomial', nfolds=50) .test = "Cross-validation: p > n" n <- 75