Skip to content

Commit

Permalink
Merge pull request #106 from trias-project/Replace-data-in-function
Browse files Browse the repository at this point in the history
Improve indicator_native_range_year()
  • Loading branch information
damianooldoni authored Mar 29, 2023
2 parents 94ec160 + 7221ea2 commit 639b224
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 32 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: trias
Title: Process Data for the Project Tracking Invasive Alien Species
(TrIAS)
Version: 2.0.5
Version: 2.0.6
Authors@R: c(
person("Damiano", "Oldoni", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3445-7562")),
Expand Down
102 changes: 79 additions & 23 deletions R/indicator_native_range_year.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,21 @@
#' Based on
#' [countYearProvince](https://github.com/inbo/reporting-rshiny-grofwildjacht/blob/exoten/reporting-grofwild/R/countYearProvince.R)
#' plot from reporting - rshiny - grofwildjacht
#' @param data input data.frame.
#' @param df input data.frame.
#' @param years (numeric) vector years we are interested to. If \code{NULL}
#' (default) all years from minimum and maximum of years of first observation
#' are taken into account.
#' @param type character, native_range level of interest should be one of
#' \code{c("native_continent", "native_range")}.
#' `c("native_range", "native_continent")`. Default: `"native_range"`. A
#' column called as the selected `type` must be present in `df`.
#' @param x_lab character string, label of the x-axis. Default: "year".
#' @param y_lab character string, label of the y-axis. Default: "number of alien
#' species".
#' @param relative (logical) if TRUE, each bar is standardised before stacking
#' @param first_observed (character) Name of the column in \code{data} containing temporal information about introduction of the alien species. Expressed as years.
#' @param relative (logical) if TRUE (default), each bar is standardised before
#' stacking.
#' @param first_observed (character) Name of the column in `data`
#' containing temporal information about introduction of the alien species.
#' Expressed as years.
#' @return list with: \itemize{ \item{'static_plot': }{ggplot object, for a
#' given species the observed number per year and per native range is plotted
#' in a stacked bar chart} \item{'interactive_plot': }{plotly object, for a
Expand All @@ -28,25 +32,81 @@
#' introduced from the native range for a given year. (n/total)*100} } } }
#' @export
#' @importFrom dplyr %>% .data

indicator_native_range_year <- function(data, years = NULL,
type = c("native_continent", "native_range"),
x_lab = "year",
y_lab = "alien species",
relative = FALSE,
first_observed = "first_observed") {
#' @examples
#' \dontrun{
#' library(readr)
#' datafile <- paste0(
#' "https://raw.githubusercontent.com/trias-project/indicators/master/data/",
#' "interim/data_input_checklist_indicators.tsv"
#' )
#' data <- read_tsv(datafile,
#' na = "",
#' col_types = cols(
#' .default = col_character(),
#' key = col_double(),
#' nubKey = col_double(),
#' speciesKey = col_double(),
#' first_observed = col_double(),
#' last_observed = col_double()
#' )
#' )
#' indicator_native_range_year(data, "native_continent", years = c(2010,2013))
#' }
indicator_native_range_year <- function(
df,
years = NULL,
type = c("native_range", "native_continent"),
x_lab = "year",
y_lab = "alien species",
relative = FALSE,
first_observed = "first_observed") {
# initial input checks
assertthat::assert_that(is.data.frame(df))
if (!is.null(years)) {
assertthat::assert_that(all(is.numeric(years)),
msg = "Argument years has to be a number."
)
assertthat::assert_that(
all(years < as.integer(format(Sys.Date(), "%Y"))),
msg = sprintf(
"All values in years has to be less than %s.", format(Sys.Date(), "%Y")
)
)
}
type <- match.arg(type)

assertthat::assert_that(type %in% names(df),
msg = sprintf("Column %s not present in df.", type)
)
if (!is.null(x_lab)) {
assertthat::assert_that(is.character(x_lab),
msg = "Argument x_lab has to be a character or NULL."
)
}
if (!is.null(y_lab)) {
assertthat::assert_that(is.character(y_lab),
msg = "Argument y_lab has to be a character or NULL."
)

}
assertthat::assert_that(is.logical(relative),
msg = "Argument relative has to be a logical."
)
assertthat::assert_that(is.character(first_observed),
msg = "Argument first_observed has to be a character."
)
assertable::assert_colnames(df, first_observed, only_colnames = FALSE)


# Rename to default column name
data <-
data %>%
df <-
df %>%
dplyr::rename_at(dplyr::vars(first_observed), ~"first_observed")

if (is.null(years)) {
years <- sort(unique(data$first_observed))
years <- sort(unique(df$first_observed))
}

plotData <- data
plotData <- df

plotData$location <- switch(type,
native_range = plotData$native_range,
Expand All @@ -56,17 +116,13 @@ indicator_native_range_year <- function(data, years = NULL,
# Select data
plotData <- plotData[plotData$first_observed %in% years, c("first_observed", "location")]
plotData <- plotData[!is.na(plotData$first_observed) & !is.na(plotData$location), ]

# Exclude unused provinces

# Set location and first_observed to factors
plotData$first_observed <- as.factor(plotData$first_observed)
plotData$location <- as.factor(plotData$location)
plotData$location <- droplevels(plotData$location)

# Summarize data per native_range and year
plotData$first_observed <- with(plotData, factor(first_observed,
levels =
min(years):max(years)
))

summaryData <- reshape2::melt(table(plotData), id.vars = "first_observed")
summaryData <- summaryData %>%
dplyr::group_by(.data$first_observed) %>%
Expand Down
37 changes: 31 additions & 6 deletions man/indicator_native_range_year.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
library(testthat)
library(purrr)
library(trias)

test_check("trias")
2 changes: 0 additions & 2 deletions tests/testthat/test-gbif_verify_keys.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#' @importFrom purrr map_chr
#'
context("test_gbif_verify_keys")

test_that("test several input types", {
Expand Down
140 changes: 140 additions & 0 deletions tests/testthat/test-indicator_native_range_year.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
context("test_indicator_native_range_year")

# test df
input_test_df <- read.delim(
test_path("data_test_input_graphs_indicators/test_visualization_df.tsv"),
sep = "\t"
)

nrow_no_first_obs <-
nrow(input_test_df[is.na(input_test_df$first_observed), ])

cleaned_input_test_df <- input_test_df[!is.na(input_test_df$first_observed), ]

test_that("Arg: df", {
expect_error(
indicator_native_range_year(3),
"df is not a data frame"
)
})


test_that("Arg: type", {
expect_error(
indicator_native_range_year(cleaned_input_test_df,
type = 2,
years = 2001),
"'arg' must be NULL or a character vector"
)
a <- cleaned_input_test_df
colnames(a)[colnames(a) == "native_range"] <- "l"
expect_error(
indicator_native_range_year(a,
type = "native_range",
years = 2001),
"Column native_range not present in df."
)
})


test_that("Arg: years", {
expect_error(
indicator_native_range_year(cleaned_input_test_df,
years = "2000"
),
"Argument years has to be a number."
)
expect_error(
indicator_native_range_year(cleaned_input_test_df,
years = c(2000,3000)
),
sprintf("All values in years has to be less than %s.", format(Sys.Date(), "%Y"))
)
})

test_that("Arg: first_observed", {
expect_error(
indicator_native_range_year(input_test_df,
years = 2010,
first_observed = 3),
"Argument first_observed has to be a character."
)
expect_error(
indicator_native_range_year(input_test_df,
year = 2010,
first_observed = "bad_colname"
)
)
})

test_that("Param: labels", {
expect_error(
indicator_native_range_year(input_test_df, x_lab = input_test_df),
"Argument x_lab has to be a character or NULL."
)
expect_error(
indicator_native_range_year(input_test_df, y_lab = 4),
"Argument y_lab has to be a character or NULL."
)
})

test_that("Test output type, class, slots and columns", {
plot_output <-
indicator_native_range_year(cleaned_input_test_df,
years = c(2000,2005)
)
plot_output_rel <-
indicator_native_range_year(cleaned_input_test_df,
years = c(2000,2005),
relative = TRUE
)
# output is a list
expect_type(plot_output, type = "list")
expect_type(plot_output_rel, type = "list")

# output has the right three slots
slots <- c("static_plot", "interactive_plot", "data")
expect_equal(names(plot_output), slots)
expect_equal(names(plot_output_rel), slots)

# static plot slot is a list with gg as class
expect_type(plot_output$static_plot, type = "list")
expect_type(plot_output_rel$static_plot, type = "list")
expect_s3_class(plot_output$static_plot, class = c("gg", "ggplot"))
expect_s3_class(plot_output_rel$static_plot, class = c("gg", "ggplot"))

# interactive plot slot is a list with plotly and htmlwidget as class
expect_type(plot_output$interactive_plot, type = "list")
expect_type(plot_output_rel$interactive_plot, type = "list")
expect_s3_class(plot_output$interactive_plot, class = c("plotly", "htmlwidget"))
expect_s3_class(plot_output_rel$interactive_plot, class = c("plotly", "htmlwidget"))

# data is a data.frame (tibble)
expect_type(plot_output$data, type = "list")
expect_s3_class(plot_output$data, class = c("data.frame", "tbl_df"))
expect_type(plot_output_rel$data, type = "list")
expect_s3_class(plot_output_rel$data, class = c("data.frame", "tbl_df"))

# data contains only columns year, native_range, n, total and perc in this
# order
expect_equal(
names(plot_output$data),
c("year", "native_range", "n", "total", "perc")
)
expect_equal(
names(plot_output_rel$data),
c("year", "native_range", "n", "total", "perc")
)

# columns year and native_range of data slot are factors
expect_true(is.factor(plot_output$data$year))
expect_true(is.factor(plot_output$data$native_range))
# columns n and total of data slot are integers
expect_true(is.integer(plot_output$data$n))
expect_true(is.integer(plot_output$data$total))
# column perc of data slot is double
expect_true(is.double(plot_output$data$perc))

# data slot is not affected by value of related arg
expect_identical(plot_output$data, plot_output_rel$data)
})

0 comments on commit 639b224

Please sign in to comment.