Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Imports:
R6 (>= 2.4.1)
License: GPL-3
Encoding: UTF-8
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
URL: https://github.com/MarselScheer/bootGOF
BugReports: https://github.com/MarselScheer/bootGOF/issues
Suggests:
Expand Down
20 changes: 16 additions & 4 deletions R/GOF_model.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
library(parallel)

##' @title Convenience function for creating a GOF-test for statistical models
##'
##' @description Simplifies the creation of an instance of
Expand All @@ -18,6 +20,13 @@
##' class used for performing the GOF test (\link{GOF_model_test})
##' is injected. This parameter simply makes it easier to test the
##' convenience function properly.
##' @param n_cores positive integer specifying the number of CPU cores to use
##' for parallel resampling. If bigger than 1, the L'Ecuyer-CMRG is used;
##' if 'NULL' or 1, one core is used with the current RNG.
##' Default is "NULL".
##' @param seed integer intended to seed the internally setup
##' L'Ecuyer-CMRG, but will also be applied when RNG not replaced.
##' Default is "NULL", which will not alter the seed.
##' @export
##' @return instance of \link{GOF_model_test}
##' @examples
Expand Down Expand Up @@ -54,7 +63,9 @@ GOF_model <- function(model, # nolint
y_name,
Rn1_statistic, # nolint
gof_model_resample_class = GOF_model_resample,
gof_model_test_class = GOF_model_test
gof_model_test_class = GOF_model_test,
n_cores = NULL,
seed = NULL
) {
checkmate::assert_subset(
x = simulator_type,
Expand All @@ -67,8 +78,6 @@ GOF_model <- function(model, # nolint
))
}



simulators <- list(
lm = list(
parametric = GOF_lm_sim_param,
Expand Down Expand Up @@ -113,6 +122,9 @@ GOF_model <- function(model, # nolint
y_name = y_name,
Rn1_statistic = Rn1_statistic,
gof_model_info_extractor = mie,
gof_model_resample = model_resample)
gof_model_resample = model_resample,
n_cores = n_cores,
seed = seed)

return(ret)
}
44 changes: 42 additions & 2 deletions R/GOF_model_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,33 @@ GOF_model_test <- R6::R6Class( # nolint
##' @param gof_model_resample an instance that implements
##' \link{GOF_model_resample} in order to apply it to
##' \code{model}
##' @param n_cores positive integer specifying the number of CPU cores to
##' use for parallel resampling. If bigger than 1, the L'Ecuyer-CMRG is
##' used; if 'NULL' or 1, one core is used with the current RNG.
##' @param seed integer intended to seed the internally setup
##' L'Ecuyer-CMRG, but will also be applied when RNG not replaced,
##' as long as it is not "NULL".
##' @return An instance of the Class
initialize = function(model,
data,
nmb_boot_samples,
y_name,
Rn1_statistic, # nolint
gof_model_info_extractor,
gof_model_resample) {
gof_model_resample,
n_cores,
seed) {
checkmate::assert_count(x = nmb_boot_samples, positive = TRUE)
checkmate::assert_count(x = n_cores, positive = TRUE, null.ok = TRUE)
private$model_org <- model
private$data_org <- data
private$y_name <- y_name
private$Rn1_statistic <- Rn1_statistic # nolint
private$nmb_boot_samples <- nmb_boot_samples
private$model_info_extractor <- gof_model_info_extractor
private$model_resample <- gof_model_resample
private$n_cores <- n_cores
private$seed <- seed
private$order_beta_dot_X_org <- order( # nolint
private$model_info_extractor$beta_x_covariates(
model = private$model_org
Expand Down Expand Up @@ -84,6 +95,8 @@ GOF_model_test <- R6::R6Class( # nolint
nmb_boot_samples = NULL,
model_info_extractor = NULL,
model_resample = NULL,
n_cores = NULL,
seed = NULL,
Rn1_statistic = NULL,
Rn1_boot = NULL,
Rn1_org = NULL,
Expand All @@ -108,5 +121,32 @@ GOF_model_test <- R6::R6Class( # nolint
order_beta_x_covariates = private$order_beta_dot_X_org)
return(Rn1_boot)
}
private$Rn1_boot <- lapply(X = 1:private$nmb_boot_samples, FUN = f) # nolint

# Replace RNG with "L'Ecuyer-CMRG" if going parallel
replaced_rng <- FALSE
if (is.null(private$n_cores)) {
private$n_cores <- 1
} else if (private$n_cores > 1) {
# save and replace current RNG state
original_state <- if (exists(".Random.seed", .GlobalEnv))
.GlobalEnv$.Random.seed else NULL # nolint
RNGkind("L'Ecuyer-CMRG")
set.seed(NULL)
replaced_rng <- TRUE
}

if (!is.null(private$seed)) {
set.seed(private$seed)
}

private$Rn1_boot <- parallel::mclapply(X = 1:private$nmb_boot_samples, FUN = f, mc.cores = private$n_cores) # nolint

# Reset initial RNG if it has been replaced
if (replaced_rng) {
if (!is.null(original_state)) {
.GlobalEnv$.Random.seed <- original_state
} else {
RNGkind("default")
}
}
}))
38 changes: 38 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,44 @@ mt$get_pvalue()

...

## Parallelization

The bootstrapping process can be accelerated using the `n_cores` parameter of
the `GOF_model` function, that specifies the number of CPU cores to use.

If this parameter is set to at least two cores, internally the currently used
RNG is replaced by the L'Ecuyer-CMRG generator, which is safe to use
in a parallel context.

This internal generator can be seeded using the `seed` parameter of the
`GOF_model` function, which however will also apply if the `n_cores` parameter
is not used and thus the generator is not replaced.

For example:
```{r}
set.seed(1)
N <- 100
X1 <- rnorm(N)
X2 <- rnorm(N)
d <- data.frame(
y = rpois(n = N, lambda = exp(4 + X1 * 2 + X2 * 6)),
x1 = X1,
x2 = X2)

fit <- glm(y ~ x1 + x2, data = d, family = poisson())

mt <- GOF_model(
model = fit,
data = d,
nmb_boot_samples = 100,
simulator_type = "parametric",
y_name = "y",
Rn1_statistic = Rn1_KS$new(),
n_cores = 2,
seed = 1)
mt$get_pvalue()
```

## Installation

You can install it from CRAN
Expand Down
55 changes: 47 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,45 @@ not be rejected by the GOF-test:


## Parallelization

The bootstrapping process can be accelerated using the `n_cores`
parameter of the `GOF_model` function, that specifies the number of CPU
cores to use.

If this parameter is set to at least two cores, internally the currently
used RNG is replaced by the L’Ecuyer-CMRG generator, which is safe to
use in a parallel context.

This internal generator can be seeded using the `seed` parameter of the
`GOF_model` function, which however will also apply if the `n_cores`
parameter is not used and thus the generator is not replaced.

For example:

set.seed(1)
N <- 100
X1 <- rnorm(N)
X2 <- rnorm(N)
d <- data.frame(
y = rpois(n = N, lambda = exp(4 + X1 * 2 + X2 * 6)),
x1 = X1,
x2 = X2)

fit <- glm(y ~ x1 + x2, data = d, family = poisson())

mt <- GOF_model(
model = fit,
data = d,
nmb_boot_samples = 100,
simulator_type = "parametric",
y_name = "y",
Rn1_statistic = Rn1_KS$new(),
n_cores = 2,
seed = 1)
mt$get_pvalue()
#> [1] 0.62

## Installation

You can install it from CRAN
Expand All @@ -89,9 +128,9 @@ package in your environment by calling:
# sessionInfo

sessionInfo()
#> R Under development (unstable) (2025-08-19 r88650)
#> R Under development (unstable) (2025-10-19 r88945)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#> Running under: Ubuntu 24.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
Expand All @@ -109,15 +148,15 @@ package in your environment by calling:
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices datasets utils methods base
#> [1] parallel stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] bootGOF_0.1.1
#> [1] bootGOF_0.1.1.9000
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.37 desc_1.4.3 backports_1.5.0 R6_2.6.1
#> [5] fastmap_1.2.0 xfun_0.53 knitr_1.50 htmltools_0.5.8.1
#> [9] rmarkdown_2.29 cli_3.6.5 renv_1.1.5 withr_3.0.2
#> [13] pkgload_1.4.0 compiler_4.6.0 rprojroot_2.1.0 tools_4.6.0
#> [17] pkgbuild_1.4.8 checkmate_2.3.3 evaluate_1.0.4 yaml_2.3.10
#> [21] rlang_1.1.6
#> [9] rmarkdown_2.30 cli_3.6.5 pkgload_1.4.1 compiler_4.6.0
#> [13] rprojroot_2.1.1 tools_4.6.0 pkgbuild_1.4.8 checkmate_2.3.3
#> [17] evaluate_1.0.5 yaml_2.3.10 rlang_1.1.6
12 changes: 9 additions & 3 deletions inst/tinytest/test_GOF_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ GOF_model_test_dummy <- R6::R6Class( # nolint
y_name,
Rn1_statistic, # nolint
gof_model_info_extractor,
gof_model_resample) {
gof_model_resample,
n_cores,
seed) {
}))

GOF_model_error_if_fit_class_is_not_lm_or_glm <- function() { # nolint
Expand Down Expand Up @@ -86,7 +88,9 @@ GOF_model_uses_lm_info_extractor <- function() { # nolint
y_name,
Rn1_statistic, # nolint
gof_model_info_extractor,
gof_model_resample) {
gof_model_resample,
n_cores,
seed) {
inject_lm_info_extractor <<- inherits(
x = gof_model_info_extractor,
what = "GOF_lm_info_extractor")
Expand Down Expand Up @@ -209,7 +213,9 @@ GOF_model_uses_glm_info_extractor <- function() { # nolint
y_name,
Rn1_statistic, # nolint
gof_model_info_extractor,
gof_model_resample) {
gof_model_resample,
n_cores,
seed) {
inject_glm_info_extractor <<- inherits(
x = gof_model_info_extractor,
what = "GOF_glm_info_extractor")
Expand Down
Loading