Skip to content

Commit

Permalink
Fix analyses temporal range - update data
Browse files Browse the repository at this point in the history
  • Loading branch information
langbart committed Jan 11, 2024
1 parent 434a57d commit c2ba5ca
Show file tree
Hide file tree
Showing 40 changed files with 62 additions and 50 deletions.
Binary file modified .DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions R/get-summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ generate_summary_table <- function(use_20 = TRUE) {

tab <-
nut_region %>%
dplyr::mutate(nutrient = ifelse(nutrient == "vitamina", "vitaminA", nutrient)) %>%
dplyr::left_join(timor.nutrients::timor_population, by = "region") %>%
dplyr::left_join(rdi_table, by = "nutrient") %>%
dplyr::rename(
Expand Down
8 changes: 4 additions & 4 deletions R/processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ get_model_data <- function() {
dplyr::ungroup() %>%
na.omit()

#factoextra::fviz_nbclust(df[, 8:13], kmeans, method = "wss")
# factoextra::fviz_nbclust(df[, 8:13], kmeans, method = "wss")
# 4
set.seed(555)
k2 <- kmeans(df[, 8:13], centers = 5, nstart = 500)
Expand Down Expand Up @@ -137,7 +137,7 @@ get_model_data <- function() {
dplyr::ungroup() %>%
na.omit()

#factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
# factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
set.seed(555)
k2 <- kmeans(df[, 7:12], centers = 5, nstart = 500)

Expand Down Expand Up @@ -192,7 +192,7 @@ get_model_data <- function() {
dplyr::ungroup() %>%
na.omit()

#factoextra::fviz_nbclust(df[, 8:13], kmeans, method = "wss")
# factoextra::fviz_nbclust(df[, 8:13], kmeans, method = "wss")
set.seed(555)
k2 <- kmeans(df[, 8:13], centers = 5, nstart = 500)

Expand Down Expand Up @@ -247,7 +247,7 @@ get_model_data <- function() {
dplyr::ungroup() %>%
na.omit()

#factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
# factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
set.seed(555)
k2 <- kmeans(df[, 7:12], centers = 5, nstart = 500)

Expand Down
12 changes: 8 additions & 4 deletions R/xgb-model.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,10 @@ run_xgmodel <- function(dataframe = NULL,
#'
#' @param model A model object that contains at least two elements: `$train_data`, a dataframe of training data,
#' and `$fit`, the fitted model object.
#'
#' @param parallel A logical value indicating whether to use parallel processing. Defaults to `NULL`, which will not
#' use parallel processing. Set to `TRUE` to enable parallel processing.
#' @param cores An integer specifying the number of cores to use for parallel processing. Only relevant if `parallel` is `TRUE`.
#' Defaults to `NULL`, which will use the default number of cores determined by the parallel backend.#'
#' @details
#' The function first removes the last column from the training data. It then selects a random subset of 500 observations
#' from this modified training dataset to serve as the background dataset for KernelSHAP. The KernelSHAP explanation
Expand All @@ -193,21 +196,22 @@ run_xgmodel <- function(dataframe = NULL,
#' }
#' @export
#'
run_kernelshap <- function(model) {
run_kernelshap <- function(model, parallel = NULL, cores = NULL) {
train_clean <- model$train_data[-ncol(model$train_data)]
bg_X <- dplyr::sample_n(train_clean, 500)

doParallel::registerDoParallel(cores = cores)

kernelshap::kernelshap(
object = model$fit,
X = train_clean,
bg_X = bg_X,
type = "prob",
feature_names = names(train_clean),
parallel = TRUE,
parallel = parallel,
verbose = TRUE
)
}

#' Plot SHAP Values for Different Model Types
#'
#' This function generates a scatter plot of SHAP (SHapley Additive exPlanations) values for different model types.
Expand Down
67 changes: 34 additions & 33 deletions data-raw/get-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,20 @@ nutrients_table <-
# dplyr::mutate(dplyr::across(c(Selenium_mu:Vitamin_A_mu), ~ .x * catch)) %>%
# rename_nutrients_mu()

region_data <- readr::read_rds(system.file("estimations_kg_12_2023.rds", package = "timor.nutrients"))
region_data <- readr::read_rds(system.file("estimations_kg_12_2023_v2.rds", package = "timor.nutrients"))
region_stats <-
region_data$municipal %>%
region_data %>%
purrr::map(~ purrr::keep(.x, stringr::str_detect(
names(.x), stringr::fixed("taxa")
))) %>%
purrr::flatten() %>%
purrr::set_names(names(region_data$municipal)) %>%
purrr::list_flatten(name_spec = "{outer}") %>%
dplyr::bind_rows(.id = "region") %>%
dplyr::rename(date_bin_start = .data$landing_period) %>%
dplyr::select(c(.data$region, .data$date_bin_start, .data$grouped_taxa, .data$catch)) %>%
dplyr::left_join(nutrients_table, by = "grouped_taxa") %>%
dplyr::mutate(dplyr::across(c(Selenium_mu:Vitamin_A_mu), ~ .x * catch)) %>%
rename_nutrients_mu() %>%
dplyr::filter(date_bin_start < "2024-01-01") %>%
rename_nutrients_mu()

# dplyr::left_join(nutrients_table, by = "grouped_taxa") %>%
Expand All @@ -81,6 +82,7 @@ kobo_trips <-
landing_id = as.character(landing_id),
n_fishers = fisher_number_man + fisher_number_woman + fisher_number_child
) %>%
dplyr::filter(landing_period >= "2018-01-01" & landing_period <= "2023-12-31") %>%
tidyr::unnest(.data$landing_catch) %>%
tidyr::unnest(.data$length_frequency) %>%
dplyr::filter(!is.na(.data$weight)) %>%
Expand Down Expand Up @@ -117,36 +119,35 @@ usethis::use_data(kobo_trips, overwrite = TRUE)
usethis::use_data(catch_data, overwrite = TRUE)
devtools::document()

#data_list <- get_model_data()
data_list <- get_model_data()

# permanova
#set.seed(555)
#data_clusters <-
# list(
# atauro_AG_perm = dplyr::slice_sample(data_list$data_raw$atauro_AG_raw, prop = .5),
# atauro_GN_perm = dplyr::slice_sample(data_list$data_raw$atauro_GN_raw, prop = .5),
# timor_AG_perm = dplyr::slice_sample(data_list$data_raw$timor_AG_raw, prop = .5),
# timor_GN_perm = dplyr::slice_sample(data_list$data_raw$timor_GN_raw, prop = .5)
# )
#perm_results <- purrr::imap(data_clusters, ~ run_permanova_clusters(.x, permutations = 999, parallel = 7))
#usethis::use_data(perm_results, overwrite = T)


# Run XGBoost model
#data_list <- get_model_data()$data_processed
#model_outputs <-
# purrr::imap(
# data_list, ~ run_xgmodel
# (dataframe = .x$dataframe, step_other = .x$step_other, n_cores = 7)
# ) %>%
# setNames(paste0("model_", names(.)))

#usethis::use_data(model_outputs, overwrite = TRUE)
#devtools::document()
#permanova
set.seed(555)
data_clusters <-
list(
atauro_AG_perm = dplyr::slice_sample(data_list$data_raw$atauro_AG_raw, prop = .5),
atauro_GN_perm = dplyr::slice_sample(data_list$data_raw$atauro_GN_raw, prop = .5),
timor_GN_perm = dplyr::slice_sample(data_list$data_raw$timor_GN_raw, prop = .5),
timor_AG_perm = dplyr::slice_sample(data_list$data_raw$timor_AG_raw, prop = .5)
)
perm_results <- purrr::imap(data_clusters, ~ run_permanova_clusters(.x, permutations = 999, parallel = 8))
usethis::use_data(perm_results, overwrite = T)
devtools::document()

#Run XGBoost model
data_list <- get_model_data()$data_processed
model_outputs <-
purrr::imap(
data_list, ~ run_xgmodel
(dataframe = .x$dataframe, step_other = .x$step_other, n_cores = 8)
) %>%
setNames(paste0("model_", names(.)))

usethis::use_data(model_outputs, overwrite = TRUE)
devtools::document()

# Get shap values
#shap_results <- purrr::map(timor.nutrients::model_outputs,run_kernelshap)
#usethis::use_data(shap_results, overwrite = T)
#devtools::document()

#Get shap values
shap_results <- purrr::map(timor.nutrients::model_outputs, run_kernelshap, parallel = TRUE, cores = 8)
usethis::use_data(shap_results, overwrite = T)
devtools::document()
Binary file modified data/catch_data.rda
Binary file not shown.
Binary file modified data/kobo_trips.rda
Binary file not shown.
Binary file modified data/model_outputs.rda
Binary file not shown.
Binary file modified data/perm_results.rda
Binary file not shown.
Binary file modified data/region_stats.rda
Binary file not shown.
Binary file modified data/shap_results.rda
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions docs/highlight.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ <h1>
<div id="header">
<h1 class="title">Modelling scenarios for nutrient-sensitive fisheries management</h1>
<p class="author"><em>Lorenzo Longobardi</em></p>
<p class="date"><em>Last update: 2024-01-08</em></p>
<p class="date"><em>Last update: 2024-01-11</em></p>
</div>
<div id="content" class="section level1 hasAnchor" number="1">
<h1><span class="header-section-number">1</span> Content<a href="index.html#content" class="anchor-section" aria-label="Anchor link to header"></a></h1>
Expand Down
Loading

0 comments on commit c2ba5ca

Please sign in to comment.