Skip to content

Commit

Permalink
Merge pull request #3 from FluSightNetwork/master
Browse files Browse the repository at this point in the history
update my files
  • Loading branch information
tkcy authored Oct 27, 2017
2 parents dc594e5 + 5a38051 commit 568e55c
Show file tree
Hide file tree
Showing 9 changed files with 3,948 additions and 3,829 deletions.
5 changes: 0 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,4 @@ env:
global:
- ENCRYPTION_LABEL: "ae5ecd417952"

before_install:
- sudo Rscript -e "install.packages('devtools', repos='http://cran.us.r-project.org')"
- sudo Rscript -e "devtools::install_github('hrbrmstr/cdcfluview')"
- sudo Rscript -e "devtools::install_github('jarad/FluSight')"

script: bash ./travis-main.sh
2 changes: 1 addition & 1 deletion flusight-deploy/0-init-flusight.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# Script to downlaod and setup flusight directory structure
# Script to download and setup flusight directory structure
set -e

# Download flusight master
Expand Down
7,582 changes: 3,791 additions & 3,791 deletions scores/scores.csv

Large diffs are not rendered by default.

102 changes: 86 additions & 16 deletions scripts/check-ensemble-scores.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ truth_condense <- function(year) {
expand_truth(week_expand = 1, percent_expand = 5)
}


exp_truth_2010 <- truth_condense(2010)
exp_truth_2011 <- truth_condense(2011)
exp_truth_2012 <- truth_condense(2012)
Expand All @@ -23,6 +22,33 @@ exp_truth_2014 <- truth_condense(2014)
exp_truth_2015 <- truth_condense(2015)
exp_truth_2016 <- truth_condense(2016)

# Pull in truth based on week 28 values in given year
obs_truth <- read.csv("../scores/target-multivals.csv",
stringsAsFactors = F)

expand_old_truth <- function(season) {
obs_truth %>%
filter(Season == season & (Calendar.Week >= 43 | Calendar.Week <= 18)) %>%
mutate(forecast_week = ifelse(Target %in% c("Season onset", "Season peak week",
"Season peak percentage"),
NA,
Calendar.Week)) %>%
rename(location = Location, target = Target, bin_start_incl = Valid.Bin_start_incl) %>%
distinct(location, target, forecast_week, bin_start_incl) %>%
{if (season == "2014/2015") expand_truth(., week53 = T) else expand_truth(.) }

}

obs_exp_truth_2010 <- expand_old_truth("2010/2011")
obs_exp_truth_2011 <- expand_old_truth("2011/2012")
obs_exp_truth_2012 <- expand_old_truth("2012/2013")
obs_exp_truth_2013 <- expand_old_truth("2013/2014")
obs_exp_truth_2014 <- expand_old_truth("2014/2015")
obs_exp_truth_2015 <- expand_old_truth("2015/2016")
obs_exp_truth_2016 <- expand_old_truth("2016/2017")



# Pull in csvs from ensembles

read_all_entries <- function(model) {
Expand Down Expand Up @@ -54,20 +80,20 @@ ensemble_scores <- function(model) {
entries <- read_all_entries(model)

scores <- list()
scores[["2010/2011"]] <- purrr::map(entries[["2010/2011"]],
score_entry, exp_truth_2010)
scores[["2011/2012"]] <- purrr::map(entries[["2011/2012"]],
score_entry, exp_truth_2011)
scores[["2012/2013"]] <- purrr::map(entries[["2012/2013"]],
score_entry, exp_truth_2012)
scores[["2013/2014"]] <- purrr::map(entries[["2013/2014"]],
score_entry, exp_truth_2013)
scores[["2014/2015"]] <- purrr::map(entries[["2014/2015"]],
score_entry, exp_truth_2014)
scores[["2015/2016"]] <- purrr::map(entries[["2015/2016"]],
score_entry, exp_truth_2015)
scores[["2016/2017"]] <- purrr::map(entries[["2016/2017"]],
score_entry, exp_truth_2016)
scores[["2010/2011"]] <- map(entries[["2010/2011"]],
score_entry, obs_exp_truth_2010)
scores[["2011/2012"]] <- map(entries[["2011/2012"]],
score_entry, obs_exp_truth_2011)
scores[["2012/2013"]] <- map(entries[["2012/2013"]],
score_entry, obs_exp_truth_2012)
scores[["2013/2014"]] <- map(entries[["2013/2014"]],
score_entry, obs_exp_truth_2013)
scores[["2014/2015"]] <- map(entries[["2014/2015"]],
score_entry, obs_exp_truth_2014)
scores[["2015/2016"]] <- map(entries[["2015/2016"]],
score_entry, obs_exp_truth_2015)
scores[["2016/2017"]] <- map(entries[["2016/2017"]],
score_entry, obs_exp_truth_2016)

all_scores <- bind_rows(map(scores, bind_rows), .id = "season")

Expand All @@ -78,4 +104,48 @@ constant_weight_scores <- ensemble_scores("constant-weights")
equal_weight_scores <- ensemble_scores("equal-weights")
target_region_scores <- ensemble_scores("target-and-region-based-weights")
target_scores <- ensemble_scores("target-based-weights")
target_type_scores <- ensemble_scores("target-type-based-weights")
target_type_scores <- ensemble_scores("target-type-based-weights")

# Create boundaries for scores that we're interested in
all_ensemble_scores <- bind_rows(
constant_weight_scores %>% mutate(Model = "FSNetwork-CW"),
equal_weight_scores %>% mutate(Model = "FSNetwork-EW"),
target_region_scores %>% mutate(Model = "FSNetwork-TRW"),
target_scores %>% mutate(Model = "FSNetwork-TW"),
target_type_scores %>% mutate(Model = "FSNetwork-TTW")
) %>%
rename(Season = season, Location = location, Target = target,
FluSight_score = score, Epiweek = forecast_week) %>%
mutate(Model.Week = ifelse(Season == "2014/2015",
ifelse(Epiweek < 40, Epiweek + 53, Epiweek),
ifelse(Epiweek < 40, Epiweek + 52, Epiweek)))

# Compare Travis scores to FluSight scores
compare_scores <- all_ensemble_scores %>%
left_join(travis_scores, by = c("Season", "Location", "Target",
"Epiweek", "Model.Week", "Model")) %>%
select(Season, Location, Target, Epiweek, Model, FluSight_score, Multi.bin.score) %>%
mutate(diff = FluSight_score - Multi.bin.score)

# Print scores that differ by more than 1e-12
different_score <- compare_scores %>%
filter(diff > 1e-12)

table(different_score$Model)
table(different_score$Target)
table(different_score$Location)
table(different_score$Epiweek)
table(different_score$Season)

# All differences in 2014/15 peak week
obs_exp_truth_2014 %>%
filter(target == "Season peak week", location == "HHS Region 3") %>%
distinct

obs_truth %>%
filter(Season == "2014/2015", Target == "Season peak week") %>%
distinct(Season, Target, Location, Valid.Bin_start_incl)

compare_scores %>%
filter(Season == "2014/2015", Target == "Season peak week",
Location == "HHS Region 3", Epiweek == 1)
36 changes: 20 additions & 16 deletions scripts/generate-scores.js
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,25 @@ const getTrueData = truthFile => {
}

/**
* Not exactly linspace
* Return a season string for given time data
*/
const arange = (start, end, gap) => {
let out = [start]
while (out[out.length - 1] !== end) {
out.push(out[out.length - 1] + gap)
}
return out
const getSeason = (year, epiweek) => {
return (epiweek < 40) ? `${year-1}/${year}` : `${year}/${year+1}`
}

/**
* Return a neighbouring region of 1 bin around a given week
* Tell the last week (52/53) for given time data
*/
const weekNeighbours = (binStart, year) => {
let lastWeek = (new mmwr.MMWRDate(year, 1)).nWeeks
const getLastWeek = (year, epiweek) => {
let seasonFirstYear = parseInt(getSeason(year, epiweek).split('/')[0])
return (new mmwr.MMWRDate(seasonFirstYear)).nWeeks
}

/**
* Return a neighbouring region of 1 bin around a given week
*/
const weekNeighbours = (binStart, year, epiweek) => {
let lastWeek = getLastWeek(year, epiweek)
// Handle edge cases
if (binStart === 40) {
// We are at the beginning of the season
Expand All @@ -76,7 +79,7 @@ const weekNeighbours = (binStart, year) => {
// The next bin is 1
return [binStart - 1, binStart, 1]
} else if (binStart === 1) {
return [(new mmwr.MMWRDate(year - 1, 1)).nWeeks, binStart, 2]
return [lastWeek, binStart, 2]
} else {
// This is regular case
return [binStart - 1, binStart, binStart + 1]
Expand All @@ -86,12 +89,12 @@ const weekNeighbours = (binStart, year) => {
/**
* Return expanded set of binStarts for given bin value and target type
*/
const expandBinStarts = (binStarts, targetType, year) => {
const expandBinStarts = (binStarts, targetType, year, epiweek) => {
if (targetType.endsWith('ahead') || targetType.endsWith('percentage')) {
// This is a percentage target
return util.unique(binStarts.reduce((acc, binStart) => {
return acc.concat(
arange(-0.5, 0.5, 0.1)
util.arange(-0.5, 0.5, 0.1)
.map(diff => binStart + diff)
.map(bs => Math.round(bs * 10) / 10) // Round to get just one place decimal
.filter(bs => (bs >= 0.0 - Number.EPSILON) && (bs <= 13.0 + Number.EPSILON))
Expand All @@ -100,7 +103,7 @@ const expandBinStarts = (binStarts, targetType, year) => {
} else {
// This is a week target
let uniqueBinStarts = util.unique(binStarts.reduce((acc, binStart) => {
return acc.concat(weekNeighbours(binStart, year).map(bs => Math.round(bs)))
return acc.concat(weekNeighbours(binStart, year, epiweek).map(bs => Math.round(bs)))
}, []))

// If every one is NaN, then just return one NaN
Expand Down Expand Up @@ -156,6 +159,7 @@ let outputLines = [header.join(',')]
let errorLogLines = []
let errorBlacklistLines = []
let trueData = getTrueData(truthFile)
let csvData

// NOTE: For scores, we only consider these two directories
models.getModelDirs(
Expand All @@ -170,12 +174,12 @@ models.getModelDirs(
csvs.forEach(csvFile => {
let {year, epiweek} = models.getCsvTime(csvFile)
try {
let csvData = getCsvData(csvFile)
csvData = getCsvData(csvFile)
meta.regions.forEach(region => {
meta.targets.forEach(target => {
let trueTargets = trueData[year][epiweek][region][target]
let trueBinStarts = trueTargets.map(tt => parseFloat(tt[6]))
let expandedTrueBinStarts = expandBinStarts(trueBinStarts, target, year)
let expandedTrueBinStarts = expandBinStarts(trueBinStarts, target, parseInt(year), parseInt(epiweek))
let season = trueTargets[0][2]
let modelWeek = trueTargets[0][3]
let modelProbabilities = csvData[region][target]
Expand Down
9 changes: 9 additions & 0 deletions scripts/modules/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,16 @@ const readYamlFile = fileName => {
return yaml.safeLoad(fs.readFileSync(fileName, 'utf8'))
}

const arange = (start, end, gap) => {
let len = 1 + ((end - start) / gap)
return [...Array(len).keys()].map(i => start + gap * i)
}

const isClose = (a, b, tol = Number.EPSILON) => Math.abs(a - b) < tol

module.exports.isSubset = isSubset
module.exports.unique = unique
module.exports.writeLines = writeLines
module.exports.readYamlFile = readYamlFile
module.exports.arange = arange
module.exports.isClose = isClose
30 changes: 30 additions & 0 deletions scripts/report.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
title: 'Combining Mechanistic and Statistical Models to Forecat Influenza in the U.S.:
A Collaborative Ensemble from the FluSight Network'
author: Nicholas G Reich, Logan Brooks, Abhinav Tushar, Teresa Yamana, Craig McGowan,
Evan Ray, Dave Osthus, Roni Rosenfeld
date: "10/26/2017"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Overview
In the 2016/2017 influenza season, the CDC ran the 4thth annual FluSight competition and received XX submissions from XX teams. During the season, analysts at the CDC built an ensemble model that combined all of the submitted models by taking the "average" forecast for each influenza target. This model was one of the top performing models for the entire season.

In March 2017 the FluSight Network was founded to create a collaborative network of influenza forecasters. This group worked throughout 2017 to create a set of guidelines and an experimental design that would enable submission of a publicly available, multi-team, real-time submission of an ensemble model with validated and performance-based weights for each model (i.e. not a simple average of models).

This document provides an executive summary of that effort, highlighting the results and documenting the chosen model that was designated for real-time submission during the 2017/2018 U.S. influenza season.

Institution | No. of models | Team leaders
----------- | ------------- | -------------
UMass-Amherst | 3 | Nicholas Reich, Abhinav Tushar, Evan Ray
Carnegie Mellon | XX | Logan Brooks, Roni Rosenfeld
Columbia University | XX | Teresa Yamana, Jeff Shaman
Los Alamos National Laboratories | 1 | Dave Osthus

## Selected Ensemble Model for Real-time Submissions

The model selected for real-time submissions is the model that performed
8 changes: 8 additions & 0 deletions setup-R-packages.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash

# Script to setup packages required for R code
set -e

sudo Rscript -e "install.packages('devtools', repos='http://cran.us.r-project.org')"
sudo Rscript -e "devtools::install_github('hrbrmstr/cdcfluview')"
sudo Rscript -e "devtools::install_github('jarad/FluSight')"
3 changes: 3 additions & 0 deletions travis-main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ git add ./model-forecasts/component-models/model-id-map.csv
git diff-index --quiet HEAD || git commit -m "autogenerated csvs"
git push $SSH_REPO HEAD:master

# Setup R now
bash ./setup-R-packages.sh

echo "> Building visualizer"
# Go back and build flusight
git checkout gh-pages || git checkout --orphan gh-pages
Expand Down

0 comments on commit 568e55c

Please sign in to comment.