diff --git a/UserManual/src/chapter_BNP.Rmd b/UserManual/src/chapter_BNP.Rmd index 4463a2885..ecf5941fe 100644 --- a/UserManual/src/chapter_BNP.Rmd +++ b/UserManual/src/chapter_BNP.Rmd @@ -13,25 +13,27 @@ require(nimble) ## Bayesian nonparametric mixture models {#sec:bnpmixtures} -NIMBLE provides support for Bayesian nonparametric (BNP) mixture modeling. The current implementation provides support for hierarchical specifications involving Dirichlet process (DP) mixtures [@ferguson_73;@ferguson_74;@lo_84;@escobar_94;@escobar_west_95]. More specifically, a DP mixture model takes the form +NIMBLE provides support for Bayesian nonparametric (BNP) mixture modeling. The current implementation provides support for hierarchical specifications involving Dirichlet process (DP) mixtures [@ferguson_73;@ferguson_74;@lo_84;@escobar_94;@escobar_west_95]. These allow one to avoid specifying a particular parametric distribution for a given node (parameter) in a model. Instead one can use a DP mixture as a much more general, nonparametric distribution. For example, a normal distribution for a random effect could be replaced by a DP mixture of normal distributions, with the number of components of the mixture being determined from the data and not fixed in advance. + +We'll first introduce the general, technical definition of a DP mixture model before describing the Chinese Restaurant Process representation, which may be more interpretable for many readers. More specifically, a DP mixture model for a random variable $y_i$ takes the form $$y_i \mid G \overset{iid}{\sim} \int h(y_i \mid \theta) G(d\theta),$$ $$G \mid \alpha, G_0 \sim DP(\alpha, G_0),$$ -where $h(\cdot \mid \theta)$ is a suitable kernel with parameter $\theta$, and $\alpha$ and $G_0$ are the concentration and baseline distribution parameters of the DP, respectively. DP mixture models can be written with different levels of hierarchy, all being equivalent to the model above. +where $h(\cdot \mid \theta)$ is a suitable kernel (i.e., probability density/mass function) with parameter $\theta$, and $\alpha$ and $G_0$ are the concentration and baseline distribution parameters of the DP, respectively. DP mixture models can be written with different levels of hierarchy, all being equivalent to the model above. While "y" would often be used as notation for a data value, it is used generically here, noting that often DP mixtures are used for random effects rather than directly for observations. -When the random measure $G$ is integrated out from the model, the DP mixture model can be written using latent or membership variables, $z_i$, following a Chinese Restaurant Process (CRP) distribution [@blackwell_mcqueen_73], discussed in Section \@ref(sec:crp). The model takes the form +When the random distribution (also referred to as a random 'measure') $G$ is integrated out from the model, the DP mixture model can be written using latent or membership variables, $z_i$, following a Chinese Restaurant Process (CRP) distribution [@blackwell_mcqueen_73], discussed in Section \@ref(sec:crp). The model takes the form $$y_i \mid \tilde{\boldsymbol{\theta}}, z_i \overset{ind}{\sim} h(\cdot \mid \tilde{\theta}_{z_i}),$$ $$\boldsymbol{z}\mid \alpha \sim \mbox{CRP}(\alpha),\hspace{0.5cm} \tilde{\theta}_j \overset{iid}{\sim}G_0,$$ -where $\mbox{CRP}(\alpha)$ denotes the CRP distribution with concentration parameter $\alpha$. +where $\mbox{CRP}(\alpha)$ denotes the CRP distribution with concentration parameter $\alpha$. Put in perhaps more intuitive terms, $z_i$ says which cluster/group the $i$th unit is in, and the parameter $\tilde{\theta}_j$ for group $j$ is distributed according to the $G_0$ baseline distribution. The parameter $\alpha$ controls how dispersed the clustering is, described more in the next section. -If a stick-breaking representation [@sethuraman_94], discussed in section \@ref(sec:sb), is assumed for the random measure $G$, then the model takes the form +If a stick-breaking representation [@sethuraman_94], discussed in section \@ref(sec:sb), is assumed for the random distribution (measure) $G$, then the model takes the form $$y_i \mid {\boldsymbol{\theta}}^{\star}, \boldsymbol{v} \overset{ind}{\sim} \sum_{l=1}^{\infty}\left\{ v_l\prod_{m= 3 && length(expr[[1]]) == 1 && + as.character(expr[[1]]) %in% c("=", "<-", "<<-") && + length(expr[[3]]) > 1 && length(expr[[3]][[1]]) == 1 && + as.character(expr[[3]][[1]]) %in% c('derivs', 'nimDerivs')) +} + +checkNestedCalcCall <- function(functionName, methodsWithCalc, methodsDerivsOf) { + if(functionName %in% methodsWithCalc) return(TRUE) + if(functionName %in% names(methodsDerivsOf)) + return(any(sapply(methodsDerivsOf[[functionName]], checkNestedCalcCall, + methodsWithCalc, methodsDerivsOf))) + return(FALSE) +} + +nf_checkDSLcode_calcDerivsArgs <- function(code, methodsWithCalc, methodsDerivsOf) { + code <- body(code) + derivsFound <- which(findDerivsCalls(code)) + for(idx in derivsFound) { + argNames <- names(code[[idx]][[3]]) + call <- code[[idx]][[3]][[2]][[1]] + if(length(call) == 1 && checkNestedCalcCall(as.character(call), methodsWithCalc, methodsDerivsOf) && + length(setdiff(c('model', 'constantNodes', 'updateNodes'), argNames))) + messageIfVerbose(" [Warning] Detected use of `nimDerivs` on a function or method, `", code[[idx]][[3]][[2]][[1]], "`,\n", + " that appears to contain the use of `calculate` on a model.\n", + " If model calculations are done in the method being differentiated, the 'model'\n", + " argument to 'nimDerivs' should be included to ensure correct restoration of\n", + " values in the model, and the 'updateNodes' and 'constantNodes' arguments\n", + " should also be provided (see Section 16.7.2 of the User Manual).") + } + invisible(NULL) +} + + nf_checkDSLcode <- function(code, methodNames, setupVarNames, args, where = NULL) { validCalls <- c(names(sizeCalls), otherDSLcalls, diff --git a/packages/nimble/R/nimbleFunction_Rderivs.R b/packages/nimble/R/nimbleFunction_Rderivs.R index b8864bde7..d99ec5284 100644 --- a/packages/nimble/R/nimbleFunction_Rderivs.R +++ b/packages/nimble/R/nimbleFunction_Rderivs.R @@ -25,16 +25,25 @@ nimDerivs_dummy <- nimbleFunction( #' @param order an integer vector with values within the set \eqn{{0, 1, 2}}, #' corresponding to whether the function value, Jacobian, and Hessian should be #' returned respectively. Defaults to \code{c(0, 1, 2)}. -#' @param model (optional) for derivatives of a nimbleFunction that involves model. -#' calculations, the uncompiled model that is used. This is needed in order +#' @param model (optional) the uncompiled model that is used, if taking derivatives +#' of a nimbleFunction that involves model calculations. This is needed in order #' to be able to correctly restore values into the model when \code{order} does not -#' include 0 (or in all cases when double-taping). +#' include 0 (or in all cases when double-taping). IMPORTANT: if \code{model} +#' is included, one should also include the arguments \code{updateNodes} and +#' \code{constantNodes} using the output obtained from running +#' \code{makeModelDerivsInfo}. #' @param ... additional arguments intended for internal use only. #' #'@details Derivatives for uncompiled nimbleFunctions are calculated using the #' \code{numDeriv} package. If this package is not installed, an error will #' be issued. Derivatives for matrix valued arguments will be returned in #' column-major order. +#' +#' As discussed above with the \code{model} argument, if taking derivatives +#' of a nimbleFunction that involves model calculations (rather than directly +#' taking derivatives of `calculate`), care needs to be taken to provide +#' \code{model}, \code{updateNodes}, and \code{calcNodes} arguments. See +#' Section 16.7.2 of the User Manual for more details. #' #' @return an \code{ADNimbleList} with elements \code{value}, \code{jacobian}, #' and \code{hessian}. @@ -694,15 +703,7 @@ nimDerivs_nf <- function(call = NA, if(e$restoreInfo$deepestDepth < e$restoreInfo$currentDepth) e$restoreInfo$deepestDepth <- e$restoreInfo$currentDepth } - } else { # partial check for whether there is a model in the nimbleFunction - if(is(derivFxn, 'refMethodDef') && is.nf(e$.self)) { - isModel <- sapply(names(e), function(x) is.model(e[[x]])) - if(any(isModel)) { - modelElement <- names(e)[which(isModel)] - warning("nimDerivs_nf: detected a model, ", paste(modelElement, collapse = ','), ", associated with the nimbleFunction whose method is being differentiated. If model calculations are done in the method being differentiated, the 'model' argument to 'nimDerivs' should be included to ensure correct restoration of values in the model.") - } - } - } + } ## standardize the derivFxnCall arguments derivFxnCall <- match.call(derivFxn, derivFxnCall) diff --git a/packages/nimble/R/nimbleFunction_core.R b/packages/nimble/R/nimbleFunction_core.R index db68b14ac..381053857 100644 --- a/packages/nimble/R/nimbleFunction_core.R +++ b/packages/nimble/R/nimbleFunction_core.R @@ -85,10 +85,25 @@ nimbleFunction <- function(setup = NULL, force(where) # so that we can get to namespace where a nf is defined by using topenv(parent.frame(2)) in getNimbleFunctionEnvironment() if(is.logical(setup)) if(setup) setup <- function() {} else setup <- NULL + ## Check for correct entries in `buildDerivs` separately from `nfMethodRC$new()` because ## that only has access to `thisBuildDerivs`, and we need to check if `buildDerivs` is set ## for the method on which `nimDerivs` is called. tmp <- sapply(c(list(run = run), methods), nf_checkDSLcode_buildDerivs, buildDerivs) + + ## Check that if a model calculate is in the code of `run` or another method on + ## which `derivs` is called, that the `model`, `updateNodes`,and `constantNodes` + ## arguments are provided. + if(getNimbleOption('checkDerivsArgs') && length(buildDerivs)) { + allMethods <- c(list(run = run), methods) + if(is.character(buildDerivs)) nms <- buildDerivs else nms <- names(buildDerivs) + methodsWithCalc <- sapply(allMethods[nms], nf_checkDSLcode_checkForCalc) + methodsWithCalc <- nms[methodsWithCalc] + methodsDerivsOf <- sapply(allMethods, nf_checkDSLcode_checkDerivsOf) + methodsDerivsOf <- methodsDerivsOf[!sapply(methodsDerivsOf, is.null)] + if(length(methodsWithCalc)) + tmp <- sapply(c(list(run = run), methods), nf_checkDSLcode_calcDerivsArgs, methodsWithCalc, methodsDerivsOf) + } if(is.null(setup)) { if(length(methods) > 0) stop('Cannot provide multiple methods if there is no setup function. Use "setup = function(){}" or "setup = TRUE" if you need a setup function that does not do anything', call. = FALSE) diff --git a/packages/nimble/R/options.R b/packages/nimble/R/options.R index c017a653b..e6388c584 100644 --- a/packages/nimble/R/options.R +++ b/packages/nimble/R/options.R @@ -226,7 +226,8 @@ nimOptimMethod("bobyqa", useOldcWiseRule = FALSE, # This is a safety toggle for one change in sizeBinaryCwise, 1/24/23. After a while we can remove this. stripUnusedTypeDefs = TRUE, digits = NULL, - enableVirtualNodeFunctionDefs = FALSE + enableVirtualNodeFunctionDefs = FALSE, + checkDerivsArgs = TRUE ) ) diff --git a/packages/nimble/tests/testthat/test-ADerrorTrapping.R b/packages/nimble/tests/testthat/test-ADerrorTrapping.R index 1e9e52bb1..74ac7bf29 100644 --- a/packages/nimble/tests/testthat/test-ADerrorTrapping.R +++ b/packages/nimble/tests/testthat/test-ADerrorTrapping.R @@ -327,3 +327,220 @@ test_that("Incorrect use of buildDerivs=TRUE in nimbleFunction with setup.", { ) }) +test_that("Warning message works for use of nimDerivs with model calculate and incorrect args", { + expect_silent( + mynf <- nimbleFunction( + setup = function(model){ + paramNodes = 'psi[1:4]' + }, + run = function(x = double(1), alpha=double(1)) { + returnType(double(0)) + inds <- 1:length(x) + + tmp <- derivs(dens_calc(x), inds, order = c(0,1), model = model, constantNodes = "", updateNodes = "") + + ## Do AD on ddirch directly. This works. + tmp <- derivs(dens_direct(x, alpha), inds, order = c(0,1)) + + ## Do AD on model$calculate. This works. + tmp <- derivs(model$calculate(paramNodes), wrt = paramNodes, order = c(0,1)) + + return(dens_calc(x)) + }, + methods = list( + ## This mimics calcPrior_p in nimbleQuad + dens_calc = function(x = double(1)) { + values(model, paramNodes) <<- x + result <- model$calculate(paramNodes) + returnType(double(0)) + return(result) + }, + dens_direct = function(x = double(1), alpha=double(1)) { + result <- ddirch(x, alpha, log = TRUE) + calculate <- 7 + returnType(double(0)) + return(result) + } + ), + buildDerivs = c('dens_calc','dens_direct') + )) + + expect_message( + mynf <- nimbleFunction( + setup = function(model){ + paramNodes = 'psi[1:4]' + }, + run = function(x = double(1), alpha=double(1)) { + returnType(double(0)) + inds <- 1:length(x) + + tmp = derivs(dens_calc(x), inds, order = c(0,1), constantNodes = "", updateNodes = "") + + return(dens_calc(x)) + }, + methods = list( + ## This mimics calcPrior_p in nimbleQuad + dens_calc = function(x = double(1)) { + values(model, paramNodes) <<- x + result <- model$calculate(paramNodes) + returnType(double(0)) + return(result) + }, + dens_direct = function(x = double(1), alpha=double(1)) { + result <- ddirch(x, alpha, log = TRUE) + calculate <- 7 + returnType(double(0)) + return(result) + } + ), + buildDerivs = c('dens_calc','dens_direct') + ), "appears to contain the use of `calculate` on a model") + + expect_message( + mynf <- nimbleFunction( + setup = function(model){ + paramNodes = 'psi[1:4]' + }, + run = function(x = double(1), alpha=double(1)) { + returnType(double(0)) + inds <- 1:length(x) + + tmp <- derivs(dens_calc(x), inds, order = c(0,1), model = model) + + return(dens_calc(x)) + }, + methods = list( + ## This mimics calcPrior_p in nimbleQuad + dens_calc = function(x = double(1)) { + values(model, paramNodes) <<- x + result <- model$calculate(paramNodes) + returnType(double(0)) + return(result) + }, + dens_direct = function(x = double(1), alpha=double(1)) { + result <- ddirch(x, alpha, log = TRUE) + calculate <- 7 + returnType(double(0)) + return(result) + } + ), + buildDerivs = c('dens_calc','dens_direct') + ), "appears to contain the use of `calculate` on a model") +}) + + + + + +test_that("Warning message works for use of nimDerivs with nested model calculate and incorrect args", { + expect_silent( + mynf <- nimbleFunction( + setup = function(model){ + paramNodes = 'psi[1:4]' + }, + run = function(x = double(1), alpha=double(1)) { + }, + methods = list( + inner_logLik = function(reTransform = double(1)) { + values(model, randomEffectsNodes) <<- reTransform + ans <- model$calculate(innerCalcNodes) + return(ans) + returnType(double()) + }, + gr_inner_logLik_internal = function(reTransform = double(1)) { + ans <- derivs(inner_logLik(reTransform), wrt = re_indices_inner, order = 1, model = model, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) + return(ans$jacobian[1,]) + returnType(double(1)) + }, + ## Double taping for efficiency + he_inner_logLik_internal = function(reTransform = double(1)) { + ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = re_indices_inner, order = 1, model = model, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) + return(ans$jacobian) + returnType(double(2)) + }, + he_inner_logLik_internal = function(reTransform = double(1)) { + ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = re_indices_inner, order = 0, model = model, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) + return(ans$value) + returnType(double(2)) + } + ), buildDerivs = c('inner_logLik','gr_inner_logLik_internal','he_inner_logLik_internal') + ) + ) + + expect_message( + mynf <- nimbleFunction( + setup = function(model){ + paramNodes = 'psi[1:4]' + }, + run = function(x = double(1), alpha=double(1)) { + }, + methods = list( + inner_logLik = function(reTransform = double(1)) { + values(model, randomEffectsNodes) <<- reTransform + ans <- model$calculate(innerCalcNodes) + return(ans) + returnType(double()) + }, + gr_inner_logLik_internal = function(reTransform = double(1)) { + ans <- derivs(inner_logLik(reTransform), wrt = re_indices_inner, order = 1, model = model, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) + return(ans$jacobian[1,]) + returnType(double(1)) + }, + he_inner_logLik_internal = function(reTransform = double(1)) { + ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = re_indices_inner, order = 1, model = model, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) + return(ans$jacobian) + returnType(double(2)) + }, + he_inner_logLik = function(reTransform = double(1)) { + ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = re_indices_inner, order = 0, model = model, + constantNodes = inner_constantNodes) # Missing `updateNodes`. + return(ans$value) + returnType(double(2)) + } + ), + buildDerivs = c('inner_logLik','gr_inner_logLik_internal','he_inner_logLik_internal') + ), "appears to contain the use of `calculate` on a model") + + expect_message( + mynf <- nimbleFunction( + setup = function(model){ + paramNodes = 'psi[1:4]' + }, + run = function(x = double(1), alpha=double(1)) { + }, + methods = list( + inner_logLik = function(reTransform = double(1)) { + values(model, randomEffectsNodes) <<- reTransform + ans <- model$calculate(innerCalcNodes) + return(ans) + returnType(double()) + }, + gr_inner_logLik_internal = function(reTransform = double(1)) { + ans <- derivs(inner_logLik(reTransform), wrt = re_indices_inner, order = 1, model = model, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) + return(ans$jacobian[1,]) + returnType(double(1)) + }, + he_inner_logLik_internal = function(reTransform = double(1)) { + ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = re_indices_inner, order = 1, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) # Missing `model`. + return(ans$jacobian) + returnType(double(2)) + }, + he_inner_logLik = function(reTransform = double(1)) { + ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = re_indices_inner, order = 0, model = model, + updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) + return(ans$value) + returnType(double(2)) + } + ), + buildDerivs = c('inner_logLik','gr_inner_logLik_internal','he_inner_logLik_internal') + ), "appears to contain the use of `calculate` on a model") +}) + +