Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: dashboard #164

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ aux_data
.netlify
reports/*.html
reports/report.md
cache
cache
data
.vscode
1 change: 1 addition & 0 deletions R/aux_data_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ gen_ili_data <- function(default_day_of_week = 1) {
}

process_nhsn_data <- function(raw_nhsn_data) {
# TODO: Is this still needed?
# These are exception dates when the data was available on a different day
# than usual. In these two cases, it was the Thursday after. But to keep
# the rest of the pipeline the same, we pretend it was available on Wednesday.
Expand Down
19 changes: 3 additions & 16 deletions R/forecasters/formatters.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,28 +59,15 @@ format_covidhub <- function(pred, true_forecast_date, target_end_date, quantile_
format_flusight <- function(pred, disease = c("flu", "covid")) {
disease <- arg_match(disease)
pred %>%
add_state_info(geo_value_col = "geo_value", old_geo_code = "state_id", new_geo_code = "state_code") %>%
mutate(
reference_date = get_forecast_reference_date(forecast_date),
target = glue::glue("wk inc {disease} hosp"),
horizon = as.integer(floor((target_end_date - reference_date) / 7)),
output_type = "quantile",
output_type_id = quantile,
value = value
value = value,
location = state_code
) %>%
left_join(get_population_data() %>% select(state_id, state_code), by = c("geo_value" = "state_id")) %>%
mutate(location = state_code) %>%
select(reference_date, target, horizon, target_end_date, location, output_type, output_type_id, value)
}

#' The quantile levels used by the covidhub repository
#'
#' @param type either standard or inc_case, with inc_case being a small subset of the standard
#'
#' @export
covidhub_probs <- function(type = c("standard", "inc_case")) {
type <- match.arg(type)
switch(type,
standard = c(0.01, 0.025, seq(0.05, 0.95, by = 0.05), 0.975, 0.99),
inc_case = c(0.025, 0.100, 0.250, 0.500, 0.750, 0.900, 0.975)
) |> round(digits = 3)
}
63 changes: 54 additions & 9 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,47 @@ exclude_geos <- function(geo_forecasters_weights) {

`%nin%` <- function(x, y) !(x %in% y)

get_population_data <- function() {
readr::read_csv("https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_pop.csv", show_col_types = FALSE) %>%
rename(population = pop) %>%
# Add a row for the United States
bind_rows(
(.) %>% summarize(state_id = "us", population = sum(population), state_name = "United States", state_code = "US")
) %>%
# Duplicate the last row, but with state_id = "usa".
bind_rows((.) %>% filter(state_id == "us") %>% mutate(state_id = "usa"))
get_population_data <- function(national_code = c("both", "us", "usa")) {
national_code <- rlang::arg_match(national_code)

state_crosswalk <- readr::read_csv("https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_pop.csv", show_col_types = FALSE) %>%
rename(population = pop)
us_crosswalk <- state_crosswalk %>%
summarize(state_id = "us", population = sum(population), state_name = "United States", state_code = "US")
usa_crosswalk <- state_crosswalk %>%
filter(state_id == "us") %>%
mutate(state_id = "usa")

if (national_code == "us") {
return(bind_rows(state_crosswalk, us_crosswalk))
}
if (national_code == "usa") {
return(bind_rows(state_crosswalk, usa_crosswalk))
}
if (national_code == "both") {
return(bind_rows(state_crosswalk, us_crosswalk, usa_crosswalk))
}
}

#' Add a state info column to a dataframe
#'
#' @param df the dataframe to add the state info column to
#' @param geo_value_col the name of the column in df that contains the geo_value
#' @param info_col the name of the column in get_population_data() that contains the state info
#' the options are: state_id, state_code, state_name, population.
#'
#' @examples
#' # To add state_code to a dataframe with a geo_value column of state_ids (e.g. "ca")
#' df %>% add_state_info(geo_value_col = "geo_value", old_geo_code = "state_id", new_geo_code = "state_code")
#'
#' # To add state_id to a dataframe with a geo_value column of state_codes (e.g. "01")
#' df %>% add_state_info(geo_value_col = "location", old_geo_code = "state_code", new_geo_code = "state_id")
add_state_info <- function(df, geo_value_col, old_geo_code, new_geo_code) {
original_colnames <- colnames(df)

df %>%
left_join(get_population_data(), by = join_by(!!geo_value_col == !!old_geo_code)) %>%
select(original_colnames, !!new_geo_code)
}

filter_forecast_geos <- function(forecasts, truth_data) {
Expand Down Expand Up @@ -278,6 +310,19 @@ write_submission_file <- function(pred, forecast_reference_date, submission_dire
readr::write_csv(pred, file_path)
}

#' The quantile levels used by the covidhub repository
#'
#' @param type either standard or inc_case, with inc_case being a small subset of the standard
#'
#' @export
covidhub_probs <- function(type = c("standard", "inc_case")) {
type <- match.arg(type)
switch(type,
standard = c(0.01, 0.025, seq(0.05, 0.95, by = 0.05), 0.975, 0.99),
inc_case = c(0.025, 0.100, 0.250, 0.500, 0.750, 0.900, 0.975)
) |> round(digits = 3)
}

#' Utility to get the reference date for a given date. This is the last day of
#' the epiweek that the date falls in.
get_forecast_reference_date <- function(date) {
Expand Down
4 changes: 4 additions & 0 deletions _targets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ covid_hosp_prod:
script: scripts/covid_hosp_prod.R
store: covid_hosp_prod
use_crew: no
dashboard-proj:
script: scripts/dashboard-proj.R
store: dashboard-proj
use_crew: no
10 changes: 10 additions & 0 deletions dashboard-proj/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# CAUTION: do not edit this file by hand!
# _targets/objects/ may have large data files,
# and _targets/meta/process may have sensitive information.
# It is good pratice to either commit nothing from _targets/,
# or if your data is not too sensitive,
# commit only _targets/meta/meta.
*
!.gitignore
!meta
meta/*
140 changes: 140 additions & 0 deletions get_forecast_data.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
library(tidyverse)
library(httr)
library(lubridate)
library(progress)

options(readr.show_progress = FALSE)
options(readr.show_col_types = FALSE)


# Configuration
config <- list(
base_url = "https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/main/model-output",
forecasters = c("CMU-TimeSeries", "FluSight-baseline", "FluSight-ensemble", "FluSight-base_seasonal", "UMass-flusion"),
local_storage = "data/forecasts",
tracking_file = "data/download_tracking.csv"
)

# Function to ensure directory structure exists
setup_directories <- function(base_dir) {
dir.create(file.path(base_dir), recursive = TRUE, showWarnings = FALSE)
for (forecaster in config$forecasters) {
dir.create(file.path(base_dir, forecaster), recursive = TRUE, showWarnings = FALSE)
}
}

# Function to load tracking data
load_tracking_data <- function() {
if (file.exists(config$tracking_file)) {
read_csv(config$tracking_file)
} else {
tibble(
forecaster = character(),
filename = character(),
download_date = character(),
status = character()
)
}
}

# Function to generate possible filenames for a date range
generate_filenames <- function(start_date, end_date, forecaster) {
dates <- seq(as_date(start_date), as_date(end_date), by = "week")
filenames <- paste0(
format(dates, "%Y-%m-%d"),
"-",
forecaster,
".csv"
)
return(filenames)
}

# Function to check if file exists on GitHub
check_github_file <- function(forecaster, filename) {
url <- paste0(config$base_url, "/", forecaster, "/", filename)
response <- GET(url)
return(status_code(response) == 200)
}

# Function to download a single file
download_forecast_file <- function(forecaster, filename) {
url <- paste0(config$base_url, "/", forecaster, "/", filename)
local_path <- file.path(config$local_storage, forecaster, filename)

tryCatch(
{
download.file(url, local_path, mode = "wb", quiet = TRUE)
return("success")
},
error = function(e) {
return("failed")
}
)
}

# Main function to update forecast files
update_forecast_files <- function(days_back = 30) {
# Setup
setup_directories(config$local_storage)
tracking_data <- load_tracking_data()

# Generate date range
end_date <- Sys.Date()
start_date <- get_forecast_reference_date(end_date - days_back)

# Process each forecaster
new_tracking_records <- list()

pb_forecasters <- progress_bar$new(
format = "Downloading forecasts from :forecaster [:bar] :percent :eta",
total = length(config$forecasters),
clear = FALSE,
width = 60
)

for (forecaster in config$forecasters) {
pb_forecasters$tick(tokens = list(forecaster = forecaster))

# Get potential filenames
filenames <- generate_filenames(start_date, end_date, forecaster)

# Filter out already downloaded files
existing_files <- tracking_data %>%
filter(forecaster == !!forecaster, status == "success") %>%
pull(filename)

new_files <- setdiff(filenames, existing_files)

if (length(new_files) > 0) {
# Create nested progress bar for files
pb_files <- progress_bar$new(
format = " Downloading files [:bar] :current/:total :filename",
total = length(new_files)
)

for (filename in new_files) {
pb_files$tick(tokens = list(filename = filename))

if (check_github_file(forecaster, filename)) {
status <- download_forecast_file(forecaster, filename)

new_tracking_records[[length(new_tracking_records) + 1]] <- tibble(
forecaster = forecaster,
filename = filename,
download_date = as.character(Sys.time()),
status = status
)
}
}
}
}

# Update tracking data
if (length(new_tracking_records) > 0) {
new_tracking_data <- bind_rows(new_tracking_records)
tracking_data <- bind_rows(tracking_data, new_tracking_data)
write_csv(tracking_data, config$tracking_file)
}

return(tracking_data)
}
Empty file added scripts/dashboard-proj.R
Empty file.
Loading