diff --git a/R/map_formula.R b/R/map_formula.R index c5a8fc7d..405ad0cd 100644 --- a/R/map_formula.R +++ b/R/map_formula.R @@ -121,8 +121,22 @@ modify_call <- function(the_call, args) { # This function finds special terms in the formula. We consider special any # term that is a custom smooth_function implemented in this package find_special_terms <- function(the_labels){ - sapply(the_labels, grepl, pattern = "smooth_lag(", fixed = TRUE) | - sapply(the_labels, grepl, pattern = "smooth_time(", fixed = TRUE) | - sapply(the_labels, grepl, pattern = "smooth_space(", fixed = TRUE) | - sapply(the_labels, grepl, pattern = "smooth_space_time(", fixed = TRUE) + + lag_detect <- simple_str_detect(the_labels, "smooth_lag(") + time_detect <- simple_str_detect(the_labels, "smooth_time(") + space_detect <- simple_str_detect(the_labels, "smooth_space(") + space_time_detect <- simple_str_detect(the_labels, "smooth_space_time(") + + # if((any(time_detect) | any(space_detect)) & any(space_time_detect)) { + # cli::cli_alert_warning(paste0("smooth_space_time should not be combined ", + # "with calls to smooth_space or smooth_time")) + # } + + return(lag_detect | time_detect | space_detect | space_time_detect) +} + +# Simple implementation of string detection on a vector for our purposes +simple_str_detect <- function(labs, pat){ + sapply(labs, grepl, + pattern = pat, fixed = TRUE) } diff --git a/R/smooths-types.R b/R/smooths-types.R new file mode 100644 index 00000000..dfaea59d --- /dev/null +++ b/R/smooths-types.R @@ -0,0 +1,287 @@ +# ICAR -------------------------------------------------------------------- + +# Construct an ICAR penalization matrix for a given "dimension" and returns the +# double list args_and_vars that have the args to build a new call to s() and the +# vars necessary for the evaluation of that s() smooth +ICAR <- function(data_frame, boundaries, time, dimension, + k, bs, xt, is_spm, unused_names = c("var", "m"), ...) { + + checkmate::assert_class(data_frame, "sf") + checkmate::assert_class(boundaries, "sspm_discrete_boundary") + checkmate::assert_character(time) + checkmate::assert_character(dimension) + checkmate::assert_choice(dimension, choices = c("time", "space", "space_time")) + + # Recapture the ellipsis again + args_list <- as.list(match.call(expand.dots = FALSE)$`...`) + args_list <- args_list[!(names(args_list) %in% unused_names)] + + # ---- TIME ---- + time_levels <- levels(data_frame[[time]]) + + # ---- SPACE ---- + # Here we assume the hardcoded convention that the patch column is patch_id + # (from the discretization) + space <- "patch_id" + patches <- spm_patches(boundaries) + + # Setup done ---- + vars <- list() + + # NOTE: Some code duplication is a little bit of a shame but is necessary as + # previous attempts at functionalizing this part of the code made + # it seem more complicated and less easy to understand. + if (dimension == "time") { + + out <- list(str2lang(time)) + + ret <- ICAR_s_time(time_levels, bs, xt, vars, k, is_spm) + k <- ret$k + bs <- ret$bs + xt <- ret$xt + vars <- ret$vars + xt_list <- ret$xt_list + + } else if (dimension == "space") { + + out <- list(str2lang(space)) + + ret <- ICAR_s_space(patches, space, bs, xt, vars, k, is_spm) + k <- ret$k + bs <- ret$bs + xt <- ret$xt + vars <- ret$vars + xt_list <- ret$xt_list + + } else if (dimension == "space_time") { + + out <- list(str2lang(time), str2lang(space)) + + ret <- ICAR_s_space_time(patches, space, time_levels, bs, xt, vars, k, is_spm) + k <- ret$k + bs <- ret$bs + xt <- ret$xt + vars <- ret$vars + xt_list <- ret$xt_list + names(xt_list$xt) <- c(time, space) + + } + + return(list(args = do.call(c, + args = list(out, + list(k = k, bs = bs), + xt_list, + args_list)), + vars = vars)) +} + +ICAR_s_time <- function(time_levels, bs, xt, vars, k, is_spm){ + + n_time_levels <- as.numeric(length(time_levels)) + + if (is.null(k)) { + if (!is_spm) { + k <- n_time_levels + } + } + + if (is.null(bs)) { + + # If no bs specified, go with re + bs <- "re" + + } + + if (bs == "re"){ + + if (is.null(xt)) { + + xt_list <- NULL + + } else { + + if(is.na(xt)){ + xt <- list() + } + + checkmate::assert_list(xt) + + if (is.null(xt$penalty)) { + pen_mat_time <- ICAR_time(time_levels) + } else { + checkmate::assert_matrix(xt$penalty) + pen_mat_time <- xt + } + + pen_expression <- rlang::expr(pen_mat_time) + vars$pen_mat_time <- pen_mat_time + xt_list <- list(xt = list(penalty = pen_expression)) + + } + + } else if (bs == "mrf"){ + + if (is.null(xt)) { + + pen_mat_time <- ICAR_time(time_levels) + + } else { + + checkmate::assert_list(xt) + + if (is.null(xt$penalty)) { + pen_mat_time <- ICAR_time(time_levels) + } else { + checkmate::assert_matrix(xt$penalty) + pen_mat_time <- xt + } + + } + + # Create symbol and assign to list + pen_expression <- rlang::expr(pen_mat_time) + vars$pen_mat_time <- pen_mat_time + xt_list <- list(xt = list(penalty = pen_expression)) + + } + + return(list(k = k, bs = bs, xt = xt, vars = vars, xt_list = xt_list)) +} + +ICAR_s_space <- function(patches, space, bs, xt, vars, k, is_spm){ + + if (is.null(k)) { + if (!is_spm) { + k <- 30 + } + } + + if (is.null(bs)) { + bs <- "mrf" + } + + if (is.null(xt)) { + + pen_mat_space <- ICAR_space(patches, space) + + } else { + + checkmate::assert_list(xt) + + if (is.null(xt$penalty)) { + pen_mat_space <- ICAR_space(patches, space) + } else { + checkmate::assert_matrix(xt$penalty) + pen_mat_space <- xt + } + + } + + # Create symbol and assign to list + pen_expression <- rlang::expr(pen_mat_space) + vars$pen_mat_space <- pen_mat_space + xt_list <- list(xt = list(penalty = pen_expression)) + + return(list(k = k, bs = bs, xt = xt, vars = vars, xt_list = xt_list)) +} + +ICAR_s_space_time <- function(patches, space, time_levels, bs, xt, vars, k, is_spm){ + + if (length(xt) == 0){ + xt <- list(NULL, NULL) + } + + checkmate::assert_true(length(bs) == 2) + checkmate::assert_true(length(xt) == 2) + checkmate::assert_true(length(k) == 2) + + ret_time <- ICAR_s_time(time_levels, bs[1], xt[[1]], vars, k[1], is_spm) + ret_space <- ICAR_s_space(patches, space, bs[2], xt[[2]], vars, k[2], is_spm) + + bs <- c(ret_time$bs, ret_space$bs) + vars <- c(ret_time$vars, ret_space$vars) + k <- c(ret_time$k, ret_space$k) + xt <- list(ret_time$xt, ret_space$xt) + + xt_list <- list(xt = list(ret_time$xt_list, ret_space$xt_list)) + + if(!is.null(xt_list$xt[[1]])){ + xt_list$xt[[1]] <- list(penalty = rlang::expr(pen_mat_time)) + } + + if(!is.null(xt_list$xt[[2]])){ + xt_list$xt[[2]] <- list(penalty = rlang::expr(pen_mat_space)) + } + + return(list(k = k, bs = bs, xt = xt, vars = vars, xt_list = xt_list)) + +} + +ICAR_time <- function(time_levels) { + + # Creating an auto-regressive year penalty; this matrix means that the + # estimate for each year is penalized to be close to the years before and + # after it + + time_levels <- sort(time_levels) + # time_levels <- 1979:2018 + + n_time_levels <- length(unique(time_levels)) + + pen_mat = matrix(0, nrow = n_time_levels, ncol = n_time_levels) + dimnames(pen_mat) = list(time_levels, time_levels) + diag(pen_mat[-1, -n_time_levels]) = diag(pen_mat[-n_time_levels, -1]) = -1 + diag(pen_mat) = -(colSums(pen_mat) - diag(pen_mat)) + + return(pen_mat) + +} + +ICAR_space <- function(patches, space) { + + checkmate::assert_choice(space, names(patches)) + + patches_adj_mat = suppressAll(sf::st_intersects(patches, sparse = FALSE)) + dimnames(patches_adj_mat) = list(unique(patches[[space]]), + unique(patches[[space]])) + patches_adj_mat = patches_adj_mat + 0 + diag(patches_adj_mat) = 0 + pen_mat = diag(rowSums(patches_adj_mat)) - patches_adj_mat + + return(pen_mat) + +} + +# LINPRED ----------------------------------------------------------------- + +# Construct the lag matrix and associated lag columns for the linear predictor +# method of fitting the smooth + +LINPRED <- function(data_frame, boundaries, time, var, k, m, + unused_names = c("dimension", "bs", "xt", "is_spm"), ...) { + + checkmate::assert_class(data_frame, "sf") + checkmate::assert_class(boundaries, "sspm_discrete_boundary") + checkmate::assert_character(time) + + # Recapture the ellipsis again + args_list <- as.list(match.call(expand.dots = FALSE)$`...`) + args_list <- args_list[!(names(args_list) %in% unused_names)] + + # Make the lag and by matrices + lag_matrix <- make_lag_matrix(data_frame, k, boundaries, time) + by_matrix <- make_by_matrix(data_frame, k, boundaries, time, var) + + out <- list(str2lang("lag_matrix")) + vars <- list() + vars$lag_matrix <- lag_matrix + vars$by_matrix <- by_matrix + + return(list(args = do.call(c, + args = list(out, + list(k = k, m = m, + by = str2lang("by_matrix")), + args_list)), + vars = vars)) + +} diff --git a/R/smooths.R b/R/smooths.R index 1a9185ea..d6abc116 100644 --- a/R/smooths.R +++ b/R/smooths.R @@ -27,7 +27,7 @@ setGeneric(name = "smooth_time", type = "ICAR", k = NULL, bs = "re", - xt = NULL, + xt = NA, is_spm = FALSE, ...) { standardGeneric("smooth_time") @@ -57,9 +57,9 @@ setGeneric(name = "smooth_space_time", boundaries, time, type = "ICAR", - k = NULL, + k = c(NA, 30), bs = c("re", "mrf"), - xt = NULL, + xt = list(NA, NULL), is_spm = FALSE, ...) { standardGeneric("smooth_space_time") @@ -131,10 +131,10 @@ setMethod(f = "smooth_space_time", args_list <- as.list(match.call(expand.dots = FALSE)$`...`) do.call(smooth_routine, - append(list(dimension = "space_time", var = NULL, data_frame = data_frame, - boundaries = boundaries, time = time, - type = type, k = k, m = NULL, bs = bs, xt = xt, - is_spm = is_spm, smooth_type = "ti"), + append(list(dimension = "space_time", var = NULL, + data_frame = data_frame, boundaries = boundaries, + time = time, type = type, k = k, m = NULL, bs = bs, + xt = xt, is_spm = is_spm, smooth_type = "ti"), args_list)) } @@ -159,7 +159,6 @@ setMethod(f = "smooth_lag", } ) - # Routine ----------------------------------------------------------------- smooth_routine <- function(dimension, var, data_frame, boundaries, time, @@ -195,308 +194,3 @@ smooth_routine <- function(dimension, var, data_frame, boundaries, time, return(ret_list) } - -# ICAR -------------------------------------------------------------------- - -# Construct an ICAR penalization matrix for a given "dimension" and returns the -# double list args_and_vars that have the args to build a new call to s() and the -# vars necessary for the evaluation of that s() smooth -ICAR <- function(data_frame, boundaries, time, dimension, - k, bs, xt, is_spm, unused_names = c("var", "m"), ...) { - - checkmate::assert_class(data_frame, "sf") - checkmate::assert_class(boundaries, "sspm_discrete_boundary") - checkmate::assert_character(time) - checkmate::assert_character(dimension) - checkmate::assert_choice(dimension, choices = c("time", "space", "space_time")) - - # Recapture the ellipsis again - args_list <- as.list(match.call(expand.dots = FALSE)$`...`) - args_list <- args_list[!(names(args_list) %in% unused_names)] - - # ---- TIME ---- - time_levels <- levels(data_frame[[time]]) - n_time_levels = as.numeric(length(time_levels)) - - # ---- SPACE ---- - # Here we assume the hardcoded convention that the patch column is patch_id - # (from the discretization) - space <- "patch_id" - patches <- boundaries@patches - - # Setup done ---- - vars <- list() - - # NOTE: Some code duplication is a little bit of a shame but is necessary as - # previous attempst at functionnalizing this part of the code made - # it seem more complicated and less easy to understand. - if (dimension == "time") { - - out <- list(str2lang(time)) - - if (is.null(k)) { - if (!is_spm) { - k <- n_time_levels - } - } - - if (is.null(bs)) { - - # If no bs specified, go with re - bs <- "re" - - } - - if (bs == "re"){ - - if (is.null(xt)) { - - xt_list <- NULL - - } else { - - checkmate::assert_list(xt) - - if (is.null(xt$penalty)) { - pen_mat_time <- ICAR_time(time_levels) - } else { - checkmate::assert_matrix(xt$penalty) - pen_mat_time <- xt - } - - pen_expression <- rlang::expr(pen_mat_time) - vars$pen_mat_time <- pen_mat_time - xt_list <- list(xt = list(penalty = pen_expression)) - - } - - } else if (bs == "mrf"){ - - if (is.null(xt)) { - - pen_mat_time <- ICAR_time(time_levels) - - } else { - - checkmate::assert_list(xt) - - if (is.null(xt$penalty)) { - pen_mat_time <- ICAR_time(time_levels) - } else { - checkmate::assert_matrix(xt$penalty) - pen_mat_time <- xt - } - - } - - # Create symbol and assign to list - pen_expression <- rlang::expr(pen_mat_time) - vars$pen_mat_time <- pen_mat_time - xt_list <- list(xt = list(penalty = pen_expression)) - - } - - } else if (dimension == "space") { - - out <- list(str2lang(space)) - - if (is.null(k)) { - if (!is_spm) { - k <- 30 - } - } - - if (is.null(bs)) { - bs <- "mrf" - } - - if (is.null(xt)) { - - pen_mat_space <- ICAR_space(patches, space) - - } else { - - checkmate::assert_list(xt) - - if (is.null(xt$penalty)) { - pen_mat_space <- ICAR_space(patches, space) - } else { - checkmate::assert_matrix(xt$penalty) - pen_mat_space <- xt - } - - } - - # Create symbol and assign to list - pen_expression <- rlang::expr(pen_mat_space) - vars$pen_mat_space <- pen_mat_space - xt_list <- list(xt = list(penalty = pen_expression)) - - } else if (dimension == "space_time") { - - out <- list(str2lang(time), str2lang(space)) - - if (is.null(k)) { - if (is_spm) { - k <- c(NA, 30) - } else { - k <- c(n_time_levels, 30) - } - } - - if (is.null(bs)) { - - bs <- c("re", "mrf") - - xt_list <- NULL - - } else if (identical(bs, c("mrf", "mrf")) | - identical(bs, c(NULL, "mrf")) | - identical(bs, c("re", "mrf"))){ # If mrf specified, provide the matrix - - if (identical(bs, c(NULL, "mrf"))) bs <- c("re", "mrf") - - if (is.null(xt)) { - - pen_mat_time <- ICAR_time(time_levels) - pen_mat_space <- ICAR_space(patches, space) - - vars$pen_mat_time <- pen_mat_time - vars$pen_mat_space <- pen_mat_space - - } else { - - # Must be a list of list with correct names - checkmate::assert_list(xt) - lapply(xt, checkmate::assert_list) - checkmate::assert_names(names(xt), - subset.of = c(time, space)) - - if (is.null(xt[[time]]$penalty)) { - vars$pen_mat_time <- ICAR_time(time_levels) - } else { - checkmate::assert_matrix(xt[[time]]$penalty) - vars$pen_mat_time <- xt[[time]]$penalty - } - - if (is.null(xt[[space]]$penalty)) { - vars$pen_mat_space <- ICAR_space(patches, space) - } else { - checkmate::assert_matrix(xt[[space]]$penalty) - vars$pen_mat_space <- xt[[space]]$penalty - } - - } - - xt_list <- list(xt = list(list(penalty = rlang::expr(pen_mat_time)), - list(penalty = rlang::expr(pen_mat_space)))) - names(xt_list$xt) <- c(time, space) - - } - - # if (is.null(xt)) { - # - # pen_mat_space <- ICAR_space(patches, space) - # - # vars$pen_mat_space <- pen_mat_space - # - # } else { - # - # # Must be a list of list with correct names - # checkmate::assert_list(xt) - # lapply(xt, checkmate::assert_list) - # checkmate::assert_names(names(xt), - # subset.of = c(time, space)) - # - # if (is.null(xt[[space]]$penalty)) { - # vars$pen_mat_space <- ICAR_space(patches, space) - # } else { - # checkmate::assert_matrix(xt[[space]]$penalty) - # vars$pen_mat_space <- xt[[space]]$penalty - # } - # - # } - - # Create symbol and assign to list - # pen_expression <- rlang::expr(pen_mat_space) - # vars$pen_mat_space <- pen_mat_space - # xt_list <- list(xt = list(penalty = pen_expression)) - - } - - return(list(args = do.call(c, - args = list(out, - list(k = k, bs = bs), - xt_list, - args_list)), - vars = vars)) -} - -ICAR_time <- function(time_levels) { - - # Creating an auto-regressive year penalty; this matrix means that the - # estimate for each year is penalized to be close to the years before and - # after it - - time_levels <- sort(time_levels) - # time_levels <- 1979:2018 - - n_time_levels <- length(unique(time_levels)) - - pen_mat = matrix(0, nrow = n_time_levels, ncol = n_time_levels) - dimnames(pen_mat) = list(time_levels, time_levels) - diag(pen_mat[-1, -n_time_levels]) = diag(pen_mat[-n_time_levels, -1]) = -1 - diag(pen_mat) = -(colSums(pen_mat) - diag(pen_mat)) - - return(pen_mat) - -} - -ICAR_space <- function(patches, space) { - - checkmate::assert_choice(space, names(patches)) - - patches_adj_mat = suppressAll(sf::st_intersects(patches, sparse = FALSE)) - dimnames(patches_adj_mat) = list(unique(patches[[space]]), - unique(patches[[space]])) - patches_adj_mat = patches_adj_mat + 0 - diag(patches_adj_mat) = 0 - pen_mat = diag(rowSums(patches_adj_mat)) - patches_adj_mat - - return(pen_mat) - -} - -# LINPRED ----------------------------------------------------------------- - -# Construct the lag matrix and associated lag columns for the linear predictor -# method of fitting the smooth - -LINPRED <- function(data_frame, boundaries, time, var, k, m, - unused_names = c("dimension", "bs", "xt", "is_spm"), ...) { - - checkmate::assert_class(data_frame, "sf") - checkmate::assert_class(boundaries, "sspm_discrete_boundary") - checkmate::assert_character(time) - - # Recapture the ellipsis again - args_list <- as.list(match.call(expand.dots = FALSE)$`...`) - args_list <- args_list[!(names(args_list) %in% unused_names)] - - # Make the lag and by matrices - lag_matrix <- make_lag_matrix(data_frame, k, boundaries, time) - by_matrix <- make_by_matrix(data_frame, k, boundaries, time, var) - - out <- list(str2lang("lag_matrix")) - vars <- list() - vars$lag_matrix <- lag_matrix - vars$by_matrix <- by_matrix - - return(list(args = do.call(c, - args = list(out, - list(k = k, m = m, - by = str2lang("by_matrix")), - args_list)), - vars = vars)) - -} diff --git a/README.Rmd b/README.Rmd index 3e737397..4aab34ae 100644 --- a/README.Rmd +++ b/README.Rmd @@ -133,16 +133,14 @@ spm_points(bounds_voronoi) ```{r} biomass_smooth <- biomass_dataset %>% - spm_smooth(weight_per_km2 ~ sfa + smooth_time(by=sfa, xt = list()) + + spm_smooth(weight_per_km2 ~ sfa + smooth_time(by = sfa) + smooth_space() + - smooth_space_time(bs = c(NULL, "mrf"), - k = c(NA, 30)), + smooth_space_time(), boundaries = bounds_voronoi, - family=tw) %>% - spm_smooth(temp_at_bottom ~ smooth_time(by=sfa, xt = list()) + + family=tw)%>% + spm_smooth(temp_at_bottom ~ smooth_time(by=sfa, xt = NULL) + smooth_space() + - smooth_space_time(bs = c(NULL, "mrf"), - k = c(NA, 30)), + smooth_space_time(xt = NULL), family=gaussian) biomass_smooth diff --git a/README.md b/README.md index 7f3e16f3..e64ee8c4 100644 --- a/README.md +++ b/README.md @@ -242,16 +242,18 @@ spm_points(bounds_voronoi) ``` r biomass_smooth <- biomass_dataset %>% - spm_smooth(weight_per_km2 ~ sfa + smooth_time(by=sfa) + smooth_space() + - smooth_space_time(k = c(NA, 30)), + spm_smooth(weight_per_km2 ~ sfa + smooth_time(by = sfa) + + smooth_space() + + smooth_space_time(), boundaries = bounds_voronoi, - family=tw) %>% - spm_smooth(temp_at_bottom ~ smooth_time(by=sfa) + smooth_space() + - smooth_space_time(k = c(NA, 30)), + family=tw)%>% + spm_smooth(temp_at_bottom ~ smooth_time(by=sfa, xt = NULL) + + smooth_space() + + smooth_space_time(xt = NULL), family=gaussian) -#> ℹ Fitting formula: weight_per_km2 ~ sfa + smooth_time(by = sfa) + smooth_space() + smooth_space_time(k = c(NA, 30)) for dataset 'borealis' +#> ℹ Fitting formula: weight_per_km2 ~ sfa + smooth_time(by = sfa) + smooth_space() + smooth_space_time() for dataset 'borealis' #> ℹ Note: response variable temp_at_bottom is NOT a biomass density variable -#> ℹ Fitting formula: temp_at_bottom ~ smooth_time(by = sfa) + smooth_space() + smooth_space_time(k = c(NA, 30)) for dataset 'borealis' +#> ℹ Fitting formula: temp_at_bottom ~ smooth_time(by = sfa, xt = NULL) + smooth_space() + smooth_space_time(xt = NULL) for dataset 'borealis' biomass_smooth #> @@ -267,7 +269,7 @@ biomass_smooth vars” above) can be easily plotted: ``` r -plot(biomass_smooth, var = "weight_per_km2", log = FALSE, aggregate = T) +plot(biomass_smooth, var = "weight_per_km2", log = FALSE) ``` You @@ -283,10 +285,10 @@ plot(biomass_smooth, var = "weight_per_km2", use_sf = TRUE) ``` r predator_smooth <- predator_dataset %>% - spm_smooth(weight_per_km2 ~ smooth_time(k = 3) + smooth_space(), + spm_smooth(weight_per_km2 ~ smooth_time() + smooth_space(), boundaries = bounds_voronoi, drop.unused.levels = F, family=tw, method= "fREML") -#> ℹ Fitting formula: weight_per_km2 ~ smooth_time(k = 3) + smooth_space() for dataset 'all_predators' +#> ℹ Fitting formula: weight_per_km2 ~ smooth_time() + smooth_space() for dataset 'all_predators' predator_smooth #> diff --git a/man/smooths.Rd b/man/smooths.Rd index 36619f9f..37fdbf06 100644 --- a/man/smooths.Rd +++ b/man/smooths.Rd @@ -18,7 +18,7 @@ smooth_time( type = "ICAR", k = NULL, bs = "re", - xt = NULL, + xt = NA, is_spm = FALSE, ... ) @@ -40,9 +40,9 @@ smooth_space_time( boundaries, time, type = "ICAR", - k = NULL, + k = c(NA, 30), bs = c("re", "mrf"), - xt = NULL, + xt = list(NA, NULL), is_spm = FALSE, ... ) @@ -65,7 +65,7 @@ smooth_lag( type = "ICAR", k = NULL, bs = "re", - xt = NULL, + xt = NA, is_spm = FALSE, ... ) @@ -87,9 +87,9 @@ smooth_lag( boundaries, time, type = "ICAR", - k = NULL, + k = c(NA, 30), bs = c("re", "mrf"), - xt = NULL, + xt = list(NA, NULL), is_spm = FALSE, ... )