Merge pull request #106 from trias-project/Replace-data-in-function

Improve indicator_native_range_year()
trias-project · Mar 29, 2023 · 639b224 · 639b224
2 parents 94ec160 + 7221ea2
commit 639b224
Show file tree

Hide file tree

Showing 6 changed files with 252 additions and 32 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: trias
 Title: Process Data for the Project Tracking Invasive Alien Species
     (TrIAS)
-Version: 2.0.5
+Version: 2.0.6
 Authors@R: c(
     person("Damiano", "Oldoni", email = "[email protected]",
            role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3445-7562")),

diff --git a/R/indicator_native_range_year.R b/R/indicator_native_range_year.R
@@ -4,17 +4,21 @@
 #' Based on
 #' [countYearProvince](https://github.com/inbo/reporting-rshiny-grofwildjacht/blob/exoten/reporting-grofwild/R/countYearProvince.R)
 #' plot from reporting - rshiny - grofwildjacht
-#' @param data input data.frame.
+#' @param df input data.frame.
 #' @param years (numeric) vector years we are interested to. If \code{NULL}
 #'   (default) all years from minimum and maximum of years of first observation
 #'   are taken into account.
 #' @param type character, native_range level of interest should be one of
-#'   \code{c("native_continent", "native_range")}.
+#'   `c("native_range", "native_continent")`. Default: `"native_range"`. A
+#'   column called as the selected `type` must be present in `df`.
 #' @param x_lab character string, label of the x-axis. Default: "year".
 #' @param y_lab character string, label of the y-axis. Default: "number of alien
 #'   species".
-#' @param relative (logical) if TRUE, each bar is standardised before stacking
-#' @param first_observed (character) Name of the column in \code{data} containing temporal information about introduction of the alien species. Expressed as years.
+#' @param relative (logical) if TRUE (default), each bar is standardised before
+#'   stacking.
+#' @param first_observed (character) Name of the column in `data`
+#'   containing temporal information about introduction of the alien species.
+#'   Expressed as years.
 #' @return list with: \itemize{ \item{'static_plot': }{ggplot object, for a
 #'   given species the observed number per year and per native range is plotted
 #'   in a stacked bar chart} \item{'interactive_plot': }{plotly object, for a
@@ -28,25 +32,81 @@
 #'   introduced from the native range for a given year. (n/total)*100} } } }
 #' @export
 #' @importFrom dplyr %>% .data
-
-indicator_native_range_year <- function(data, years = NULL,
-                                        type = c("native_continent", "native_range"),
-                                        x_lab = "year",
-                                        y_lab = "alien species",
-                                        relative = FALSE,
-                                        first_observed = "first_observed") {
+#' @examples
+#' \dontrun{
+#' library(readr)
+#' datafile <- paste0(
+#'   "https://raw.githubusercontent.com/trias-project/indicators/master/data/",
+#'   "interim/data_input_checklist_indicators.tsv"
+#' )
+#' data <- read_tsv(datafile,
+#'   na = "",
+#'   col_types = cols(
+#'     .default = col_character(),
+#'     key = col_double(),
+#'     nubKey = col_double(),
+#'     speciesKey = col_double(),
+#'     first_observed = col_double(),
+#'     last_observed = col_double()
+#'   )
+#' )
+#' indicator_native_range_year(data, "native_continent", years = c(2010,2013))
+#' }
+indicator_native_range_year <- function(
+    df,
+    years = NULL,
+    type = c("native_range", "native_continent"),
+    x_lab = "year",
+    y_lab = "alien species",
+    relative = FALSE,
+    first_observed = "first_observed") {
+  # initial input checks
+  assertthat::assert_that(is.data.frame(df))
+  if (!is.null(years)) {
+    assertthat::assert_that(all(is.numeric(years)),
+                            msg = "Argument years has to be a number."
+    )
+    assertthat::assert_that(
+      all(years < as.integer(format(Sys.Date(), "%Y"))),
+      msg = sprintf(
+        "All values in years has to be less than %s.", format(Sys.Date(), "%Y")
+      )
+    )
+  }
   type <- match.arg(type)
-
+  assertthat::assert_that(type %in% names(df),
+                          msg = sprintf("Column %s not present in df.", type)
+  )
+  if (!is.null(x_lab)) {
+    assertthat::assert_that(is.character(x_lab),
+                            msg = "Argument x_lab has to be a character or NULL."
+    )
+  }
+  if (!is.null(y_lab)) {
+    assertthat::assert_that(is.character(y_lab),
+                            msg = "Argument y_lab has to be a character or NULL."
+    )
+
+  }
+  assertthat::assert_that(is.logical(relative),
+                          msg = "Argument relative has to be a logical."
+  )
+  assertthat::assert_that(is.character(first_observed),
+                          msg = "Argument first_observed has to be a character."
+  )
+  assertable::assert_colnames(df, first_observed, only_colnames = FALSE)
+
+
   # Rename to default column name
-  data <-
-    data %>%
+  df <-
+    df %>%
     dplyr::rename_at(dplyr::vars(first_observed), ~"first_observed")
 
   if (is.null(years)) {
-    years <- sort(unique(data$first_observed))
+    years <- sort(unique(df$first_observed))
   }
 
-  plotData <- data
+  plotData <- df
 
   plotData$location <- switch(type,
     native_range = plotData$native_range,
@@ -56,17 +116,13 @@ indicator_native_range_year <- function(data, years = NULL,
   # Select data
   plotData <- plotData[plotData$first_observed %in% years, c("first_observed", "location")]
   plotData <- plotData[!is.na(plotData$first_observed) & !is.na(plotData$location), ]
-
-  # Exclude unused provinces
+
+  # Set location and first_observed to factors
+  plotData$first_observed <- as.factor(plotData$first_observed)
   plotData$location <- as.factor(plotData$location)
   plotData$location <- droplevels(plotData$location)
 
   # Summarize data per native_range and year
-  plotData$first_observed <- with(plotData, factor(first_observed,
-    levels =
-      min(years):max(years)
-  ))
-
   summaryData <- reshape2::melt(table(plotData), id.vars = "first_observed")
   summaryData <- summaryData %>%
     dplyr::group_by(.data$first_observed) %>%

diff --git a/man/indicator_native_range_year.Rd b/man/indicator_native_range_year.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -1,4 +1,5 @@
 library(testthat)
+library(purrr)
 library(trias)
 
 test_check("trias")
diff --git a/tests/testthat/test-gbif_verify_keys.R b/tests/testthat/test-gbif_verify_keys.R
@@ -1,5 +1,3 @@
-#' @importFrom purrr map_chr
-#' 
 context("test_gbif_verify_keys")
 
 test_that("test several input types", {

diff --git a/tests/testthat/test-indicator_native_range_year.R b/tests/testthat/test-indicator_native_range_year.R
@@ -0,0 +1,140 @@
+context("test_indicator_native_range_year")
+
+# test df
+input_test_df <- read.delim(
+  test_path("data_test_input_graphs_indicators/test_visualization_df.tsv"),
+  sep = "\t"
+)
+
+nrow_no_first_obs <-
+  nrow(input_test_df[is.na(input_test_df$first_observed), ])
+
+cleaned_input_test_df <- input_test_df[!is.na(input_test_df$first_observed), ]
+
+test_that("Arg: df", {
+  expect_error(
+    indicator_native_range_year(3),
+    "df is not a data frame"
+  )
+})
+
+
+test_that("Arg: type", {
+  expect_error(
+    indicator_native_range_year(cleaned_input_test_df, 
+                                type = 2, 
+                                years = 2001),
+    "'arg' must be NULL or a character vector"
+  )
+  a <- cleaned_input_test_df
+  colnames(a)[colnames(a) == "native_range"] <- "l"
+  expect_error(
+    indicator_native_range_year(a, 
+                                type = "native_range", 
+                                years = 2001),
+    "Column native_range not present in df."
+  )
+})
+
+
+test_that("Arg: years", {
+  expect_error(
+    indicator_native_range_year(cleaned_input_test_df,
+      years = "2000"
+    ),
+    "Argument years has to be a number."
+  )
+  expect_error(
+    indicator_native_range_year(cleaned_input_test_df,
+                                years = c(2000,3000)
+    ),
+    sprintf("All values in years has to be less than %s.", format(Sys.Date(), "%Y"))
+  )
+})
+
+test_that("Arg: first_observed", {
+  expect_error(
+    indicator_native_range_year(input_test_df, 
+                                years = 2010, 
+                                first_observed = 3),
+    "Argument first_observed has to be a character."
+  )
+  expect_error(
+    indicator_native_range_year(input_test_df,
+                                year = 2010,
+                                first_observed = "bad_colname"
+    )
+  )
+})
+
+test_that("Param: labels", {
+  expect_error(
+    indicator_native_range_year(input_test_df, x_lab = input_test_df),
+    "Argument x_lab has to be a character or NULL."
+  )
+  expect_error(
+    indicator_native_range_year(input_test_df, y_lab = 4),
+    "Argument y_lab has to be a character or NULL."
+  )
+})
+
+test_that("Test output type, class, slots and columns", {
+  plot_output <-
+    indicator_native_range_year(cleaned_input_test_df,
+                                years = c(2000,2005)
+  )
+  plot_output_rel <-
+    indicator_native_range_year(cleaned_input_test_df,
+                                years = c(2000,2005),
+                                relative = TRUE
+  )
+  # output is a list
+  expect_type(plot_output, type = "list")
+  expect_type(plot_output_rel, type = "list")
+
+  # output has the right three slots
+  slots <- c("static_plot", "interactive_plot", "data")
+  expect_equal(names(plot_output), slots)
+  expect_equal(names(plot_output_rel), slots)
+
+  # static plot slot is a list with gg as class
+  expect_type(plot_output$static_plot, type = "list")
+  expect_type(plot_output_rel$static_plot, type = "list")
+  expect_s3_class(plot_output$static_plot, class = c("gg", "ggplot"))
+  expect_s3_class(plot_output_rel$static_plot, class = c("gg", "ggplot"))
+
+  # interactive plot slot is a list with plotly and htmlwidget as class
+  expect_type(plot_output$interactive_plot, type = "list")
+  expect_type(plot_output_rel$interactive_plot, type = "list")
+  expect_s3_class(plot_output$interactive_plot, class = c("plotly", "htmlwidget"))
+  expect_s3_class(plot_output_rel$interactive_plot, class = c("plotly", "htmlwidget"))
+
+  # data is a data.frame (tibble)
+  expect_type(plot_output$data, type = "list")
+  expect_s3_class(plot_output$data, class = c("data.frame", "tbl_df"))
+  expect_type(plot_output_rel$data, type = "list")
+  expect_s3_class(plot_output_rel$data, class = c("data.frame", "tbl_df"))
+
+  # data contains only columns year, native_range, n, total and perc in this
+  # order
+  expect_equal(
+    names(plot_output$data),
+    c("year", "native_range", "n", "total", "perc")
+  )
+  expect_equal(
+    names(plot_output_rel$data),
+    c("year", "native_range", "n", "total", "perc")
+  )
+
+  # columns year and native_range of data slot are factors
+  expect_true(is.factor(plot_output$data$year))
+  expect_true(is.factor(plot_output$data$native_range))
+  # columns n and total of data slot are integers
+  expect_true(is.integer(plot_output$data$n))
+  expect_true(is.integer(plot_output$data$total))
+  # column perc of data slot is double
+  expect_true(is.double(plot_output$data$perc))
+
+  # data slot is not affected by value of related arg
+  expect_identical(plot_output$data, plot_output_rel$data)
+})