Update nutrients values - use all observations

WorldFishCenter · Dec 23, 2023 · 1c671d1 · 1c671d1
1 parent 8310151
commit 1c671d1
Show file tree

Hide file tree

Showing 79 changed files with 593 additions and 5,750 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,6 +16,7 @@ Imports:
     doParallel,
     dplyr,
     factoextra,
+    future,
     magrittr,
     rlang,
     vegan

diff --git a/R/get-tables.R b/R/get-tables.R
@@ -79,8 +79,19 @@ get_nutrients_table <- function(pars, summarise = TRUE, convert = TRUE) {
       Vitamin_A_mu = .data$VitaminA
     ) %>%
     dplyr::filter(!.data$interagency_code %in% unique(fao_groups$interagency_code)) %>%
-    dplyr::bind_rows(fao_groups)
+    dplyr::bind_rows(fao_groups) %>%
+    dplyr::filter(!.data$interagency_code == "FLY")
+
+  # use pelagic fish groups to infer FLY nutrients values
+  pelagics <-
+    nutrients_tab %>%
+    dplyr::filter(.data$interagency_code %in% c("CLP", "RAX", "SDX")) %>%
+    dplyr::summarise(dplyr::across(dplyr::where(is.numeric), ~ median(.x, na.rm = T))) %>%
+    dplyr::mutate(interagency_code = "FLY")
 
+  nutrients_tab <-
+    nutrients_tab %>%
+    dplyr::bind_rows(pelagics)
 
   if (isTRUE(convert)) {
     nutrients_tab <-
@@ -130,6 +141,44 @@ get_rfish_table <- function(pars) {
   readr::read_rds(file = rfish_rds)
 }
 
+get_fao_composition <- function() {
+  fao_comp <- readr::read_csv("https://github.com/WorldFishCenter/timor.nutrients/raw/main/inst/fao_food_composition.csv")
+
+  octopus <- c("OCT", "OCT")
+  squids <- c("SQZ", "SQR", "OMZ", "CTL", "CTC")
+  cockles <- c("CLV", "SVE")
+  shrimps <- c("CSH", "PAL", "PAN", "PRA", "PEZ", "ENS", "MPM", "MPN", "PRB", "WKP", "PBA", "GIT", "TIP", "PNV", "SHS")
+  crabs <- c("CAD", "DUN", "CRE", "PCR", "SWM", "CRB", "SCD", "MUD")
+  lobsters <- c("NEX", "LBA", "LBE", "NEP", "VLO", "LOR")
+
+  fao_comp %>%
+    dplyr::rename(interagency_code = .data$integragency_code) %>%
+    dplyr::filter(.data$food_state == "r") %>%
+    dplyr::filter(.data$interagency_code %in% c(octopus, squids, cockles, shrimps, crabs, lobsters)) %>%
+    dplyr::mutate(interagency_code = dplyr::case_when(
+      .data$interagency_code %in% octopus ~ "OCZ",
+      .data$interagency_code %in% squids ~ "IAX",
+      .data$interagency_code %in% cockles ~ "COZ",
+      .data$interagency_code %in% shrimps ~ "PEZ",
+      .data$interagency_code %in% crabs ~ "CRA",
+      .data$interagency_code %in% lobsters ~ "SLV",
+      TRUE ~ .data$interagency_code
+    )) %>%
+    # dplyr::group_by(.data$interagency_code) %>%
+    # dplyr::summarise(dplyr::across(.data$`protein(g)`:.data$`omega3(g)`, ~ median(.x, na.rm = TRUE))) %>%
+    dplyr::rename(
+      Protein_mu = .data$`protein(g)`,
+      Calcium_mu = .data$`calcium(mg)`,
+      Iron_mu = .data$`iron(mg)`,
+      Zinc_mu = .data$`zinc(mg)`,
+      Selenium_mu = .data$`selenium(mcg)`,
+      Vitamin_A_mu = .data$`vitaminA(mcg)`,
+      Omega_3_mu = .data$`omega3(g)`
+    ) %>%
+    dplyr::select(.data$interagency_code, .data$Protein_mu:.data$Omega_3_mu)
+}
+
+
 get_merged_trips <- function(pars, ...) {
   trips <-
     cloud_object_name(
@@ -164,38 +213,3 @@ get_merged_trips <- function(pars, ...) {
     )) %>%
     dplyr::select(-.data$reporting_region_fill)
 }
-
-get_fao_composition <- function() {
-  fao_comp <- readr::read_csv(system.file("fao_food_composition.csv", package = "timor.nutrients"))
-
-  octopus <- c("OCT", "OCT")
-  squids <- c("SQZ", "SQR", "OMZ", "CTL", "CTC")
-  cockles <- c("CLV", "SVE")
-  shrimps <- c("CSH", "PAL", "PAN", "PRA", "PEZ", "ENS", "MPM", "MPN", "PRB", "WKP", "PBA", "GIT", "TIP", "PNV", "SHS")
-  crabs <- c("CAD", "DUN", "CRE", "PCR", "SWM", "CRB", "SCD", "MUD")
-  lobsters <- c("NEX", "LBA", "LBE", "NEP", "VLO", "LOR")
-
-  fao_comp %>%
-    dplyr::filter(food_state == "r") %>%
-    dplyr::filter(integragency_code %in% c(octopus, squids, cockles, shrimps, crabs, lobsters)) %>%
-    dplyr::mutate(interagency_code = dplyr::case_when(
-      integragency_code %in% octopus ~ "OCZ",
-      integragency_code %in% squids ~ "IAX",
-      integragency_code %in% cockles ~ "COZ",
-      integragency_code %in% shrimps ~ "PEZ",
-      integragency_code %in% crabs ~ "CRA",
-      integragency_code %in% lobsters ~ "SLV",
-      TRUE ~ integragency_code
-    )) %>%
-    dplyr::group_by(interagency_code) %>%
-    dplyr::summarise(dplyr::across(`protein(g)`:`omega3(g)`, ~ median(.x, na.rm = TRUE))) %>%
-    dplyr::rename(
-      Protein_mu = `protein(g)`,
-      Calcium_mu = `calcium(mg)`,
-      Iron_mu = `iron(mg)`,
-      Zinc_mu = `zinc(mg)`,
-      Selenium_mu = `selenium(mcg)`,
-      Vitamin_A_mu = `vitaminA(mcg)`,
-      Omega_3_mu = `omega3(g)`
-    )
-}
diff --git a/R/google-storage.R b/R/google-storage.R
@@ -47,11 +47,9 @@ cloud_storage_authenticate <- function(provider, options) {
 #'
 #' @param file a file-path (character) to upload. A vector with multiple files
 #'   is also supported.
-#' @param provider
-#' @param options
+#' @param provider cloud provider to use, either "gcs" or "aws"
+#' @param options named list with cloud provider options, see details
 #' @param name What to call the file once uploaded. Default is the filepath
-#' @inheritParams cloud_storage_authenticate
-#' @details
 #'
 #' ### Google Cloud Services
 #'
@@ -116,14 +114,11 @@ upload_cloud_file <- function(file, provider, options, name = file) {
 #'   [add_version] when the file was uploaded to the cloud provider
 #' @param extension extension of the desired file. Use an empty string "" to
 #'   return all extensions founds
-#' @param provider
+#' @param provider cloud provider to use, either "gcs" or "aws"
 #' @param exact_match logical indicating whether the prefix should be matched
 #'   exactly
-#' @param options
-#' @inheritParams upload_cloud_file
-#'
+#' @param options named list with cloud provider options, see details
 #'
-#' @details
 #'
 #' ### Google Cloud Services
 #'
@@ -220,11 +215,10 @@ cloud_object_name <- function(prefix, version = "latest", extension = "",
 #' Download object from the cloud storage to a local file
 #'
 #' @param name the name of the object in the storage bucket.
-#' @param provider
-#' @param options
+#' @param provider cloud provider to use, either "gcs" or "aws"
+#' @param options named list with cloud provider options, see details
 #' @param file a file-path (character) where the object will be saved. Default
 #'   is the object name.
-#' @inheritParams upload_cloud_file
 #'
 #'
 #' @return the file path

diff --git a/R/processing.R b/R/processing.R
@@ -77,14 +77,15 @@ get_model_data <- function() {
     tidyr::pivot_wider(names_from = "nutrient", values_from = "people_rni_kg") %>%
     dplyr::mutate(quarter = lubridate::quarter(landing_date)) %>%
     dplyr::select(landing_date, quarter, dplyr::everything()) %>%
-    dplyr::group_by(landing_date, quarter, vessel_type, habitat, gear_type, mesh_size) %>%
+    dplyr::group_by(landing_id, landing_date, quarter, vessel_type, habitat, gear_type, mesh_size) %>%
     dplyr::summarise(dplyr::across(is.numeric, ~ median(.x, na.rm = T))) %>%
     dplyr::ungroup() %>%
     na.omit()
 
-  # factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
+  #factoextra::fviz_nbclust(df[, 8:13], kmeans, method = "wss")
+  # 4
   set.seed(555)
-  k2 <- kmeans(df[, 7:12], centers = 5, nstart = 500)
+  k2 <- kmeans(df[, 8:13], centers = 5, nstart = 500)
 
   timor_GN_raw <-
     dplyr::tibble(
@@ -102,7 +103,7 @@ get_model_data <- function() {
 
   profiles_plot_timor_GN <-
     factoextra::fviz_cluster(k2,
-      data = df[, 6:11],
+      data = df[, 8:13],
       geom = c("point"),
       shape = 19,
       alpha = 0.25,
@@ -131,22 +132,21 @@ get_model_data <- function() {
     tidyr::pivot_wider(names_from = "nutrient", values_from = "people_rni_kg") %>%
     dplyr::mutate(quarter = lubridate::quarter(landing_date)) %>%
     dplyr::select(landing_date, quarter, dplyr::everything()) %>%
-    dplyr::group_by(landing_date, quarter, vessel_type, habitat, gear_type) %>%
+    dplyr::group_by(landing_id, landing_date, quarter, vessel_type, habitat, gear_type) %>%
     dplyr::summarise(dplyr::across(is.numeric, ~ median(.x, na.rm = T))) %>%
     dplyr::ungroup() %>%
     na.omit()
 
-  # factoextra::fviz_nbclust(df[, 6:11], kmeans, method = "wss")
+  #factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
   set.seed(555)
-  k2 <- kmeans(df[, 6:11], centers = 5, nstart = 500)
+  k2 <- kmeans(df[, 7:12], centers = 5, nstart = 500)
 
   timor_AG_raw <-
     dplyr::tibble(
       clusters = as.character(k2$cluster),
       df
     )
 
-
   timor_AG <-
     dplyr::tibble(
       clusters = as.character(k2$cluster),
@@ -158,7 +158,7 @@ get_model_data <- function() {
 
   profiles_plot_timor_AG <-
     factoextra::fviz_cluster(k2,
-      data = df[, 6:11],
+      data = df[, 7:12],
       geom = c("point"),
       shape = 19,
       alpha = 0.25,
@@ -187,14 +187,14 @@ get_model_data <- function() {
     tidyr::pivot_wider(names_from = "nutrient", values_from = "people_rni_kg") %>%
     dplyr::mutate(quarter = lubridate::quarter(landing_date)) %>%
     dplyr::select(landing_date, quarter, dplyr::everything()) %>%
-    dplyr::group_by(landing_date, quarter, vessel_type, habitat, gear_type, mesh_size) %>%
+    dplyr::group_by(landing_id, landing_date, quarter, vessel_type, habitat, gear_type, mesh_size) %>%
     dplyr::summarise(dplyr::across(is.numeric, ~ median(.x, na.rm = T))) %>%
     dplyr::ungroup() %>%
     na.omit()
 
-  # factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
+  #factoextra::fviz_nbclust(df[, 8:13], kmeans, method = "wss")
   set.seed(555)
-  k2 <- kmeans(df[, 7:12], centers = 5, nstart = 500)
+  k2 <- kmeans(df[, 8:13], centers = 5, nstart = 500)
 
   atauro_GN_raw <-
     dplyr::tibble(
@@ -212,7 +212,7 @@ get_model_data <- function() {
 
   profiles_plot_atauro_GN <-
     factoextra::fviz_cluster(k2,
-      data = df[, 6:11],
+      data = df[, 8:13],
       geom = c("point"),
       shape = 19,
       alpha = 0.25,
@@ -242,14 +242,14 @@ get_model_data <- function() {
     tidyr::pivot_wider(names_from = "nutrient", values_from = "people_rni_kg") %>%
     dplyr::mutate(quarter = lubridate::quarter(landing_date)) %>%
     dplyr::select(landing_date, quarter, dplyr::everything()) %>%
-    dplyr::group_by(landing_date, quarter, vessel_type, habitat, gear_type) %>%
+    dplyr::group_by(landing_id, landing_date, quarter, vessel_type, habitat, gear_type) %>%
     dplyr::summarise(dplyr::across(is.numeric, ~ median(.x, na.rm = T))) %>%
     dplyr::ungroup() %>%
     na.omit()
 
-  # factoextra::fviz_nbclust(df[, 6:11], kmeans, method = "wss")
+  #factoextra::fviz_nbclust(df[, 7:12], kmeans, method = "wss")
   set.seed(555)
-  k2 <- kmeans(df[, 6:11], centers = 5, nstart = 500)
+  k2 <- kmeans(df[, 7:12], centers = 5, nstart = 500)
 
 
   atauro_AG_raw <-
@@ -269,7 +269,7 @@ get_model_data <- function() {
 
   profiles_plot_atauro_AG <-
     factoextra::fviz_cluster(k2,
-      data = df[, 6:11],
+      data = df[, 7:12],
       geom = c("point"),
       shape = 19,
       alpha = 0.25,
@@ -342,7 +342,8 @@ run_permanova_clusters <- function(x, permutations = NULL, parallel = NULL) {
   permanova_results <- vegan::adonis2(dist_matrix ~ clusters,
     data = anov_dat,
     parallel = parallel,
-    permutations = permutations
+    permutations = permutations,
+    save.memory = TRUE
   )
   broom::tidy(permanova_results)
 }