Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve indicator_native_range_year() #106

Merged
merged 9 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: trias
Title: Process Data for the Project Tracking Invasive Alien Species
(TrIAS)
Version: 2.0.5
Version: 2.0.6
Authors@R: c(
person("Damiano", "Oldoni", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3445-7562")),
Expand Down
102 changes: 79 additions & 23 deletions R/indicator_native_range_year.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,21 @@
#' Based on
#' [countYearProvince](https://github.com/inbo/reporting-rshiny-grofwildjacht/blob/exoten/reporting-grofwild/R/countYearProvince.R)
#' plot from reporting - rshiny - grofwildjacht
#' @param data input data.frame.
#' @param df input data.frame.
#' @param years (numeric) vector years we are interested to. If \code{NULL}
#' (default) all years from minimum and maximum of years of first observation
#' are taken into account.
#' @param type character, native_range level of interest should be one of
#' \code{c("native_continent", "native_range")}.
#' `c("native_range", "native_continent")`. Default: `"native_range"`. A
#' column called as the selected `type` must be present in `df`.
#' @param x_lab character string, label of the x-axis. Default: "year".
#' @param y_lab character string, label of the y-axis. Default: "number of alien
#' species".
#' @param relative (logical) if TRUE, each bar is standardised before stacking
#' @param first_observed (character) Name of the column in \code{data} containing temporal information about introduction of the alien species. Expressed as years.
#' @param relative (logical) if TRUE (default), each bar is standardised before
#' stacking.
#' @param first_observed (character) Name of the column in `data`
#' containing temporal information about introduction of the alien species.
#' Expressed as years.
#' @return list with: \itemize{ \item{'static_plot': }{ggplot object, for a
#' given species the observed number per year and per native range is plotted
#' in a stacked bar chart} \item{'interactive_plot': }{plotly object, for a
Expand All @@ -28,25 +32,81 @@
#' introduced from the native range for a given year. (n/total)*100} } } }
#' @export
#' @importFrom dplyr %>% .data

indicator_native_range_year <- function(data, years = NULL,
type = c("native_continent", "native_range"),
x_lab = "year",
y_lab = "alien species",
relative = FALSE,
first_observed = "first_observed") {
#' @examples
#' \dontrun{
#' library(readr)
#' datafile <- paste0(
#' "https://raw.githubusercontent.com/trias-project/indicators/master/data/",
#' "interim/data_input_checklist_indicators.tsv"
#' )
#' data <- read_tsv(datafile,
#' na = "",
#' col_types = cols(
#' .default = col_character(),
#' key = col_double(),
#' nubKey = col_double(),
#' speciesKey = col_double(),
#' first_observed = col_double(),
#' last_observed = col_double()
#' )
#' )
#' indicator_native_range_year(data, "native_continent", years = c(2010,2013))
#' }
indicator_native_range_year <- function(
df,
years = NULL,
type = c("native_range", "native_continent"),
x_lab = "year",
y_lab = "alien species",
relative = FALSE,
first_observed = "first_observed") {
# initial input checks
assertthat::assert_that(is.data.frame(df))
if (!is.null(years)) {
assertthat::assert_that(all(is.numeric(years)),
msg = "Argument years has to be a number."
)
assertthat::assert_that(
all(years < as.integer(format(Sys.Date(), "%Y"))),
msg = sprintf(
"All values in years has to be less than %s.", format(Sys.Date(), "%Y")
)
)
}
type <- match.arg(type)

assertthat::assert_that(type %in% names(df),
msg = sprintf("Column %s not present in df.", type)
)
if (!is.null(x_lab)) {
assertthat::assert_that(is.character(x_lab),
msg = "Argument x_lab has to be a character or NULL."
)
}
if (!is.null(y_lab)) {
assertthat::assert_that(is.character(y_lab),
msg = "Argument y_lab has to be a character or NULL."
)

}
assertthat::assert_that(is.logical(relative),
msg = "Argument relative has to be a logical."
)
assertthat::assert_that(is.character(first_observed),
msg = "Argument first_observed has to be a character."
)
assertable::assert_colnames(df, first_observed, only_colnames = FALSE)


# Rename to default column name
data <-
data %>%
df <-
df %>%
dplyr::rename_at(dplyr::vars(first_observed), ~"first_observed")

if (is.null(years)) {
years <- sort(unique(data$first_observed))
years <- sort(unique(df$first_observed))
}

plotData <- data
plotData <- df

plotData$location <- switch(type,
native_range = plotData$native_range,
Expand All @@ -56,17 +116,13 @@ indicator_native_range_year <- function(data, years = NULL,
# Select data
plotData <- plotData[plotData$first_observed %in% years, c("first_observed", "location")]
plotData <- plotData[!is.na(plotData$first_observed) & !is.na(plotData$location), ]

# Exclude unused provinces

# Set location and first_observed to factors
plotData$first_observed <- as.factor(plotData$first_observed)
plotData$location <- as.factor(plotData$location)
plotData$location <- droplevels(plotData$location)

# Summarize data per native_range and year
plotData$first_observed <- with(plotData, factor(first_observed,
levels =
min(years):max(years)
))

summaryData <- reshape2::melt(table(plotData), id.vars = "first_observed")
summaryData <- summaryData %>%
dplyr::group_by(.data$first_observed) %>%
Expand Down
37 changes: 31 additions & 6 deletions man/indicator_native_range_year.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
library(testthat)
library(purrr)
library(trias)

test_check("trias")
2 changes: 0 additions & 2 deletions tests/testthat/test-gbif_verify_keys.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#' @importFrom purrr map_chr
#'
context("test_gbif_verify_keys")

test_that("test several input types", {
Expand Down
140 changes: 140 additions & 0 deletions tests/testthat/test-indicator_native_range_year.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
context("test_indicator_native_range_year")

# test df
input_test_df <- read.delim(
test_path("data_test_input_graphs_indicators/test_visualization_df.tsv"),
sep = "\t"
)

nrow_no_first_obs <-
nrow(input_test_df[is.na(input_test_df$first_observed), ])

cleaned_input_test_df <- input_test_df[!is.na(input_test_df$first_observed), ]

test_that("Arg: df", {
expect_error(
indicator_native_range_year(3),
"df is not a data frame"
)
})


test_that("Arg: type", {
expect_error(
indicator_native_range_year(cleaned_input_test_df,
type = 2,
years = 2001),
"'arg' must be NULL or a character vector"
)
a <- cleaned_input_test_df
colnames(a)[colnames(a) == "native_range"] <- "l"
expect_error(
indicator_native_range_year(a,
type = "native_range",
years = 2001),
"Column native_range not present in df."
)
})


test_that("Arg: years", {
expect_error(
indicator_native_range_year(cleaned_input_test_df,
years = "2000"
),
"Argument years has to be a number."
)
expect_error(
indicator_native_range_year(cleaned_input_test_df,
years = c(2000,3000)
),
sprintf("All values in years has to be less than %s.", format(Sys.Date(), "%Y"))
)
})

test_that("Arg: first_observed", {
expect_error(
indicator_native_range_year(input_test_df,
years = 2010,
first_observed = 3),
"Argument first_observed has to be a character."
)
expect_error(
indicator_native_range_year(input_test_df,
year = 2010,
first_observed = "bad_colname"
)
)
})

test_that("Param: labels", {
expect_error(
indicator_native_range_year(input_test_df, x_lab = input_test_df),
"Argument x_lab has to be a character or NULL."
)
expect_error(
indicator_native_range_year(input_test_df, y_lab = 4),
"Argument y_lab has to be a character or NULL."
)
})

test_that("Test output type, class, slots and columns", {
plot_output <-
indicator_native_range_year(cleaned_input_test_df,
years = c(2000,2005)
)
plot_output_rel <-
indicator_native_range_year(cleaned_input_test_df,
years = c(2000,2005),
relative = TRUE
)
# output is a list
expect_type(plot_output, type = "list")
expect_type(plot_output_rel, type = "list")

# output has the right three slots
slots <- c("static_plot", "interactive_plot", "data")
expect_equal(names(plot_output), slots)
expect_equal(names(plot_output_rel), slots)

# static plot slot is a list with gg as class
expect_type(plot_output$static_plot, type = "list")
expect_type(plot_output_rel$static_plot, type = "list")
expect_s3_class(plot_output$static_plot, class = c("gg", "ggplot"))
expect_s3_class(plot_output_rel$static_plot, class = c("gg", "ggplot"))

# interactive plot slot is a list with plotly and htmlwidget as class
expect_type(plot_output$interactive_plot, type = "list")
expect_type(plot_output_rel$interactive_plot, type = "list")
expect_s3_class(plot_output$interactive_plot, class = c("plotly", "htmlwidget"))
expect_s3_class(plot_output_rel$interactive_plot, class = c("plotly", "htmlwidget"))

# data is a data.frame (tibble)
expect_type(plot_output$data, type = "list")
expect_s3_class(plot_output$data, class = c("data.frame", "tbl_df"))
expect_type(plot_output_rel$data, type = "list")
expect_s3_class(plot_output_rel$data, class = c("data.frame", "tbl_df"))

# data contains only columns year, native_range, n, total and perc in this
# order
expect_equal(
names(plot_output$data),
c("year", "native_range", "n", "total", "perc")
)
expect_equal(
names(plot_output_rel$data),
c("year", "native_range", "n", "total", "perc")
)

# columns year and native_range of data slot are factors
expect_true(is.factor(plot_output$data$year))
expect_true(is.factor(plot_output$data$native_range))
# columns n and total of data slot are integers
expect_true(is.integer(plot_output$data$n))
expect_true(is.integer(plot_output$data$total))
# column perc of data slot is double
expect_true(is.double(plot_output$data$perc))

# data slot is not affected by value of related arg
expect_identical(plot_output$data, plot_output_rel$data)
})