diff --git a/.github/workflows/pipeline-run-check.yaml b/.github/workflows/pipeline-run-check.yaml index 2b88e5432..9fcd43ac6 100644 --- a/.github/workflows/pipeline-run-check.yaml +++ b/.github/workflows/pipeline-run-check.yaml @@ -60,7 +60,7 @@ jobs: - name: Install Julia dependencies run: | - julia --project=EpiAutoGP -e 'using Pkg; Pkg.instantiate()' + julia --project=pipelines/epiautogp -e 'using Pkg; Pkg.instantiate()' - name: Download test data uses: actions/download-artifact@v8 @@ -103,7 +103,7 @@ jobs: - name: Install Julia dependencies run: | - julia --project=EpiAutoGP -e 'using Pkg; Pkg.instantiate()' + julia --project=pipelines/epiautogp -e 'using Pkg; Pkg.instantiate()' - name: Download test data uses: actions/download-artifact@v8 @@ -146,7 +146,7 @@ jobs: - name: Install Julia dependencies run: | - julia --project=EpiAutoGP -e 'using Pkg; Pkg.instantiate()' + julia --project=pipelines/epiautogp -e 'using Pkg; Pkg.instantiate()' - name: Download test data uses: actions/download-artifact@v8 @@ -189,7 +189,7 @@ jobs: - name: Install Julia dependencies run: | - julia --project=EpiAutoGP -e 'using Pkg; Pkg.instantiate()' + julia --project=pipelines/epiautogp -e 'using Pkg; Pkg.instantiate()' - name: Download test data uses: actions/download-artifact@v8 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c4875082d..45d051e30 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -30,6 +30,7 @@ jobs: run: | uv run pytest \ --ignore=pipelines/tests/test_epiautogp_parquet_interop.py \ + --ignore=pipelines/tests/test_epiautogp_parquet.py \ --cov=pipelines --cov-report=term --cov-report=xml:pipelines-coverage.xml . - name: Upload results to Codecov @@ -88,7 +89,7 @@ jobs: path: ${{ runner.temp }}/package epiautogp-test: - name: Run EpiAutoGP tests and collect coverage + name: Run EpiAutoGP runner output tests runs-on: ubuntu-latest steps: - name: Checkout @@ -99,26 +100,9 @@ jobs: with: version: "1.11" - - name: Run tests - uses: julia-actions/julia-runtest@v1 - with: - project: EpiAutoGP - coverage: true - - - name: Process coverage - uses: julia-actions/julia-processcoverage@v1 - with: - directories: EpiAutoGP/src - - - name: Upload results to Codecov - uses: codecov/codecov-action@v6 - with: - env_vars: OS,PYTHON - fail_ci_if_error: true - flags: epiautogp - files: lcov.info - token: ${{ secrets.CODECOV_TOKEN }} - verbose: true + - name: Install Julia dependencies + run: | + julia --project=pipelines/epiautogp -e 'using Pkg; Pkg.instantiate()' - name: Set up R with hewr uses: ./.github/actions/setup-hewr @@ -126,9 +110,9 @@ jobs: - name: Set up Pyrenew-HEW via UV uses: ./.github/actions/setup-pyrenew-hew - - name: Instantiate EpiAutoGP Julia project + - name: Run direct EpiAutoGP parquet test run: | - julia --project=EpiAutoGP -e 'using Pkg; Pkg.instantiate()' + uv run pytest pipelines/tests/test_epiautogp_parquet.py -q - name: Run EpiAutoGP parquet interop test run: | diff --git a/.gitignore b/.gitignore index f5e8c196d..37a8c18f9 100644 --- a/.gitignore +++ b/.gitignore @@ -358,6 +358,7 @@ target/ # https://github.com/github/gitignore/blob/main/Julia.gitignore # Manifest files generated by the package manager Manifest.toml +!pipelines/epiautogp/Manifest.toml # Files generated by invoking Julia with --code-coverage *.jl.cov diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b40caa53a..00eec52ad 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: hooks: - id: detect-secrets args: ["--baseline", ".secrets.baseline"] - exclude: package.lock.json + exclude: ^(package\.lock\.json|pipelines/epiautogp/Manifest\.toml)$ # R - repo: https://github.com/posit-dev/air-pre-commit rev: 0.9.0 @@ -59,3 +59,4 @@ repos: hooks: - id: typos args: ["--force-exclude"] + exclude: ^pipelines/epiautogp/Manifest\.toml$ diff --git a/Containerfile b/Containerfile index c4b197f29..ff3c787f9 100644 --- a/Containerfile +++ b/Containerfile @@ -25,15 +25,25 @@ ENV UV_PYTHON_CACHE_DIR=/root/.cache/uv/python # R package - hewr COPY ./hewr /cfa-stf-routine-forecasting/hewr -# Julia package - EpiAutoGP -COPY ./EpiAutoGP /cfa-stf-routine-forecasting/EpiAutoGP +# Julia environment for direct NowcastAutoGP runner +# Copy only Julia environment metadata first so dependency installation is cached +# independently of changes to pipeline source files. The full pipelines tree is +# copied later. +COPY ./pipelines/epiautogp/Project.toml \ + ./pipelines/epiautogp/Manifest.toml \ + /cfa-stf-routine-forecasting/pipelines/epiautogp/ # Set working directory WORKDIR /cfa-stf-routine-forecasting -# Cache Julia packages and artifacts -RUN --mount=type=cache,target=/root/.julia \ - julia --project=EpiAutoGP -e 'using Pkg; Pkg.instantiate()' +# Instantiate Julia dependencies into the image so the runtime container can run +# the EpiAutoGP subprocess without downloading packages. This is a script +# environment under pipelines/epiautogp, so we commit its Manifest.toml for a +# reproducible EpiAutoGP dependency set. +RUN julia --project=pipelines/epiautogp -e 'using Pkg; Pkg.instantiate()' + + + # Install hewr RUN Rscript -e "install.packages('pak')" diff --git a/EpiAutoGP/Project.toml b/EpiAutoGP/Project.toml deleted file mode 100644 index 5ea375c01..000000000 --- a/EpiAutoGP/Project.toml +++ /dev/null @@ -1,37 +0,0 @@ -name = "EpiAutoGP" -uuid = "c2940010-6b35-4be1-8bbf-9fa0d9979e50" -authors = ["Sam Brand (USI1) "] -version = "0.1.0" - -[deps] -ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" -CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965" -DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1" -JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" -Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" -NowcastAutoGP = "7e9f7f4b-f590-4c14-8324-de4fcbed18f7" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[sources] -NowcastAutoGP = {rev = "v0.3.0", url = "https://github.com/CDCgov/NowcastAutoGP.git"} - -[compat] -ArgParse = "1.2.0" -CSV = "0.10.15" -DBInterface = "2.6.1" -DataFramesMeta = "0.15.4" -Dates = "1.11.0" -DuckDB = "1.5.2" -JSON3 = "1.14.3" -Logging = "1.11.0" -NowcastAutoGP = "0.3.0" -Random = "1.11.0" -Statistics = "1.11.1" -StructTypes = "1.11.0" -Test = "1.11.0" diff --git a/EpiAutoGP/README.md b/EpiAutoGP/README.md deleted file mode 100644 index 3c541559e..000000000 --- a/EpiAutoGP/README.md +++ /dev/null @@ -1,114 +0,0 @@ -# EpiAutoGP - -A Julia package for epidemiological forecasting using Gaussian Process models with automatic kernel discovery and nowcasting capabilities. - -## Overview - -Uses [NowcastAutoGP](https://github.com/CDCgov/NowcastAutoGP), it provides an interface for forecasting disease surveillance data with uncertainty quantification via the entrypoint script `run.jl`. - -## Installation - -This package is part of the PyRenew-HEW forecasting pipeline. To use it: - -```bash -cd EpiAutoGP -julia --project=. -e 'using Pkg; Pkg.instantiate()' -``` - -## Quick Start - -### Running from Command Line - -```bash -julia --project=. run.jl \ - --json-input data/input.json \ - --output-dir output/ \ - --n-forecast-weeks 4 \ - --n-forecast-draws 2000 \ - --transformation boxcox -``` - -### Using as a Julia Package - -```julia -using EpiAutoGP -using Dates - -# Load input data -input_data = read_and_validate_data("path/to/input.json") - -# Generate forecasts -results = forecast_with_epiautogp( - input_data; - n_forecast_weeks = 4, - n_forecasts = 2000, - transformation_name = "boxcox" -) - -# Access results -forecast_dates = results.forecast_dates -forecasts = results.forecasts # Matrix: (dates × samples) -``` - -## Input Format - -Input data should be provided as JSON with the following structure: - -```json -{ - "dates": ["2024-01-01", "2024-01-08", "2024-01-15"], - "reports": [100.0, 120.0, 95.0], - "pathogen": "COVID-19", - "location": "CA", - "target": "nhsn", - "forecast_date": "2024-01-15", - "nowcast_dates": ["2024-01-08", "2024-01-15"], - "nowcast_reports": [[115.0, 120.0], [90.0, 95.0]] -} -``` - -The nowcast fields can be empty arrays if no nowcasting is needed/available. - -## Command-Line Options - -| Option | Description | Default | -|--------|-------------|---------| -| `--json-input` | Path to input JSON file | *Required* | -| `--output-dir` | Output directory for results | *Required* | -| `--n-forecast-weeks` | Number of weeks to forecast | 8 | -| `--n-forecast-draws` | Total number of forecast samples | 2000 | -| `--transformation` | Data transformation (`boxcox`, `positive`, `percentage`) | `boxcox` | -| `--n-particles` | Number of SMC particles | 24 | -| `--smc-data-proportion` | Proportion of data per SMC step | 0.1 | -| `--n-mcmc` | MCMC samples for kernel structure | 100 | -| `--n-hmc` | HMC samples for hyperparameters | 50 | - -## Output Format - -The model generates Hubverse-compatible forecast files with the following structure: - -- Quantile forecasts at standard probability levels -- Point forecasts (median) -- Forecast horizon in weeks from reference date -- Location and pathogen metadata - -Output file naming: `{forecast_date}-CFA-EpiAutoGP-{location}-{pathogen}-{target}.csv` - -### Forecast Dates - -Forecasts include the reference date (week 0) plus `n_forecast_weeks` ahead, this reflects that commonly the forecast date will be a nowcast date. - -For example, with `n_forecast_weeks=3`: -- Week 0: `forecast_date` -- Week 1: `forecast_date + 1 week` -- Week 2: `forecast_date + 2 weeks` -- Week 3: `forecast_date + 3 weeks` - - -## Testing - -Run the test suite: - -```bash -julia --project=. -e 'using Pkg; Pkg.test()' -``` diff --git a/EpiAutoGP/run.jl b/EpiAutoGP/run.jl deleted file mode 100644 index 47b87cabf..000000000 --- a/EpiAutoGP/run.jl +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env julia - -# Import EpiAutoGP module with all functions -using EpiAutoGP -using Logging - -# Configure logging -logger = SimpleLogger() -global_logger(logger) - -""" -EpiAutoGP model runner for PyRenew-HEW pipeline - -This script serves as the entry point for running EpiAutoGP models in the PyRenew-HEW -forecasting pipeline. It accepts JSON input data and produces hubverse-compatible outputs. - -Usage: - julia --project=. run.jl --json-input path/to/input.json --output-dir path/to/output - -Arguments: - --json-input: Path to JSON file containing model input data - --output-dir: Directory for saving model outputs - --n-forecast-weeks: Number of weeks to forecast (default: 8) - --n-particles: Number of particles for SMC (default: 24) - --n-mcmc: Number of MCMC steps for GP kernel structure (default: 100) - --n-hmc: Number of HMC steps for GP kernel hyperparameters (default: 50) - --n-forecast-draws: Number of forecast draws (default: 2000) - --transformation: Data transformation type (default: "boxcox") - --smc-data-proportion: Proportion of data used in each SMC step (default: 0.1) -""" -function main() - """ - Main execution function - """ - return try - @info "Starting EpiAutoGP model run" - - # Parse command line arguments - args = parse_arguments() - @info "Parsed arguments successfully" - @info "Input file: $(args["json-input"])" - @info "Output directory: $(args["output-dir"])" - - # Load and validate input data - @info "Loading input data from JSON file..." - input_data = read_and_validate_data(args["json-input"]) - @info "Successfully loaded data for $(input_data.pathogen) in $(input_data.location)" - @info "Data contains $(length(input_data.dates)) time points" - @info "Forecast date: $(input_data.forecast_date)" - - # Run the EpiAutoGP forecasting model - @info "Running EpiAutoGP forecasting model..." - results = forecast_with_epiautogp(input_data, args) - @info "Model run completed successfully" - @info "Generated forecasts for $(length(results.forecast_dates)) dates" - - # Create hubverse-compatible output - @info "Creating hubverse-compatible forecast output..." - output_type = PipelineOutput() # Use default quantile levels - - hubverse_df = create_forecast_output( - input_data, - results, - args["output-dir"], - output_type; - save_output = true - ) - - @info "EpiAutoGP model run completed successfully" - @info "Results saved to $(args["output-dir"])" - - catch e - @error "EpiAutoGP model run failed: $e" - @error "Stack trace:" exception = (e, catch_backtrace()) - rethrow(e) - end -end - -# Run main function if script is executed directly -if abspath(PROGRAM_FILE) == @__FILE__ - main() -end diff --git a/EpiAutoGP/src/EpiAutoGP.jl b/EpiAutoGP/src/EpiAutoGP.jl deleted file mode 100644 index 5bdc7caa6..000000000 --- a/EpiAutoGP/src/EpiAutoGP.jl +++ /dev/null @@ -1,53 +0,0 @@ -module EpiAutoGP -using NowcastAutoGP # Core modeling package -using CSV, DataFramesMeta, Dates, DBInterface, DuckDB, JSON3, StructTypes # Data handling packages -using ArgParse # Command-line argument parsing -using Statistics # For modeling functions - -# Export command line argument parsing -export parse_arguments - -# Export input data structures and functions -export EpiAutoGPInput, - validate_input, - read_data, - read_and_validate_data - -# Export modeling functions -export prepare_for_modelling, - fit_base_model, - forecast_with_epiautogp, - forecast_with_epiautogp - -# Export output functions and types -export AbstractForecastOutput, - AbstractHubverseOutput, - QuantileOutput, - PipelineOutput, - create_forecast_df, - create_forecast_output - -# constants for default pathogen and target abbreviations -const DEFAULT_PATHOGEN_DICT = Dict( - "COVID-19" => "covid", - "Influenza" => "flu", - "RSV" => "rsv" -) -const DEFAULT_TARGET_DICT = Dict( - "nhsn" => "hosp", - "nssp" => "prop ed visits" -) -const DEFAULT_TARGET_LETTER = Dict( - "nhsn" => "h", - "nssp" => "e" -) -const DEFAULT_GROUP_NAME = "CFA" -const DEFAULT_MODEL_NAME = "EpiAutoGP" - -# Include source files -include("parse_arguments.jl") # Function to parse command line arguments -include("input.jl") # Functions to load and process input JSON data -include("modelling.jl") # Main modeling and forecasting functions -include("output.jl") # Functions for generating hubverse outputs - -end diff --git a/EpiAutoGP/src/input.jl b/EpiAutoGP/src/input.jl deleted file mode 100644 index 388dc9694..000000000 --- a/EpiAutoGP/src/input.jl +++ /dev/null @@ -1,194 +0,0 @@ -""" - struct EpiAutoGPInput - -A structured input data type for EpiAutoGP epidemiological modeling. - -This struct represents the complete input dataset required for running epidemiological -forecasting models in combination with nowcasting using `NowcastAutoGP.jl`. It combines historical observation data -with nowcasting requirements and forecast parameters. - -# Fields -- `dates::Vector{Date}`: Vector of observation dates in chronological order -- `reports::Vector{Real}`: Vector of case counts/measurements corresponding to each date -- `pathogen::String`: Disease identifier (e.g., "COVID-19", "Influenza", "RSV") -- `location::String`: Geographic location identifier (e.g., "CA", "NY", "US") -- `target::String`: Target data type (e.g., "nssp", "nhsn") -- `frequency::String`: Temporal frequency of data ("daily" or "epiweekly") -- `ed_visit_type::String`: Type of ED visits ("observed" or "other"), only applicable for NSSP target -- `forecast_date::Date`: Reference date from which forecasting begins, often this will be a nowcast date -- `nowcast_dates::Vector{Date}`: Dates requiring nowcasting (typically recent dates with incomplete data) -- `nowcast_reports::Vector{Vector{Real}}`: Uncertainty bounds or samples for nowcast dates - -""" -struct EpiAutoGPInput - dates::Vector{Date} - reports::Vector{Real} - pathogen::String - location::String - target::String - frequency::String - ed_visit_type::String - forecast_date::Date - nowcast_dates::Vector{Date} - nowcast_reports::Vector{Vector{Real}} -end - -# Enable JSON3 serialization -StructTypes.StructType(::Type{EpiAutoGPInput}) = StructTypes.Struct() - -""" - function validate_input(data::EpiAutoGPInput) - -Validate an `EpiAutoGPInput` data structure for consistency and correctness. - -Performs comprehensive validation including: -- Array length consistency between dates and reports -- Chronological ordering of dates and nowcast dates -- Non-negative finite values for all reports -- Non-empty string identifiers for pathogen and location -- Reasonable forecast date relative to data range -- Proper structure of nowcast data - -# Arguments -- `data::EpiAutoGPInput`: The input data structure to validate - -# Returns -- `Bool`: Returns `true` if validation passes -""" -function validate_input(data::EpiAutoGPInput; valid_targets = ["nhsn", "nssp"]) - @assert data.target in valid_targets "Target must be one of $(valid_targets), got '$(data.target)'" - # Check array length consistency - if length(data.dates) != length(data.reports) - throw(ArgumentError("Length mismatch: dates ($(length(data.dates))) and reports ($(length(data.reports))) must have the same length")) - end - - # Check for non-empty essential data - if length(data.dates) == 0 - throw(ArgumentError("Empty data: dates and reports cannot be empty")) - end - - # Check date ordering - if !issorted(data.dates) - throw(ArgumentError("Date ordering: dates must be sorted chronologically")) - end - - # Check nowcast data consistency - # If no nowcast dates, should have no nowcast reports (pure forecasting) - if isempty(data.nowcast_dates) && !isempty(data.nowcast_reports) - throw(ArgumentError("Nowcast consistency error: no nowcast_dates provided but nowcast_reports is not empty")) - end - - # If nowcast dates exist, each vector in nowcast_reports should have length equal to number of nowcast_dates - # (each vector represents one realization across all nowcast dates) - if !isempty(data.nowcast_dates) - for (i, report_vec) in enumerate(data.nowcast_reports) - if length(report_vec) != length(data.nowcast_dates) - throw(ArgumentError("Nowcast vector length mismatch at index $i: nowcast_reports[$i] has length $(length(report_vec)) but should have length $(length(data.nowcast_dates)) to match nowcast_dates")) - end - end - end - - # Check nowcast date ordering - if !isempty(data.nowcast_dates) && !issorted(data.nowcast_dates) - throw(ArgumentError("Nowcast date ordering: nowcast_dates must be sorted chronologically")) - end - - # Check string identifiers - if isempty(strip(data.pathogen)) - throw(ArgumentError("Invalid pathogen: pathogen cannot be empty or whitespace")) - end - - if isempty(strip(data.location)) - throw(ArgumentError("Invalid location: location cannot be empty or whitespace")) - end - - # Check numerical validity - for (i, report) in enumerate(data.reports) - if !isfinite(report) || report < 0 - throw(ArgumentError("Invalid report value at index $i: reports must be non-negative finite numbers (got $report)")) - end - end - - # Check nowcast reports validity - for (i, report_vec) in enumerate(data.nowcast_reports) - for (j, report) in enumerate(report_vec) - if !isfinite(report) || report < 0 - throw(ArgumentError("Invalid nowcast report value at index [$i][$j]: must be non-negative finite number (got $report)")) - end - end - end - - # Check forecast date reasonableness - if !isempty(data.dates) - date_range = maximum(data.dates) - minimum(data.dates) - days_buffer = max(30, Int(ceil(date_range.value / 10))) - - if data.forecast_date < minimum(data.dates) - Day(days_buffer) - throw(ArgumentError("Forecast date ($(data.forecast_date)) is too far before the data range ($(minimum(data.dates)) to $(maximum(data.dates)))")) - end - - if data.forecast_date > maximum(data.dates) + Day(days_buffer) - throw(ArgumentError("Forecast date ($(data.forecast_date)) is too far after the data range ($(minimum(data.dates)) to $(maximum(data.dates)))")) - end - end - - return true -end - -""" - function read_data(path_to_json::String) - -Read and parse epidemiological input data from a JSON file. - -This function reads a JSON file and deserializes it into an `EpiAutoGPInput` struct -using JSON3.jl. The JSON file should contain all required fields matching the -struct definition. - -# Arguments -- `path_to_json::String`: Path to the JSON file containing input data - -# Returns -- `EpiAutoGPInput`: Parsed data structure ready for model input - -# Throws -- `SystemError`: If the file cannot be read (e.g., file not found) -- `JSON3.StructuralError`: If JSON structure doesn't match expected format -- `ArgumentError`: If date parsing fails or data types are incompatible - -# Examples -```julia -# Read data from a JSON file -data = read_data("path/to/input_data.json") - -# The JSON file should have structure like: -# { -# "dates": ["2024-01-01", "2024-01-02"], -# "reports": [45.0, 52.0], -# "pathogen": "COVID-19", -# "location": "CA", -# "forecast_date": "2024-01-02", -# "nowcast_dates": [], -# "nowcast_reports": [] -# } -``` - -!!! note - This function does not validate the data. Use [`read_and_validate_data`](@ref) - for automatic validation, or call [`validate_input`](@ref) separately. -""" -function read_data(path_to_json::String) - json_string = read(path_to_json, String) - data = JSON3.read(json_string, EpiAutoGPInput) - return data -end - -""" - read_and_validate_data(path_to_json::String) -> EpiAutoGPInput - -Read epidemiological data from JSON file with automatic validation. -""" -function read_and_validate_data(path_to_json::String) - data = read_data(path_to_json) - validate_input(data) - return data -end diff --git a/EpiAutoGP/src/modelling.jl b/EpiAutoGP/src/modelling.jl deleted file mode 100644 index 9be477bc5..000000000 --- a/EpiAutoGP/src/modelling.jl +++ /dev/null @@ -1,248 +0,0 @@ -""" - prepare_for_modelling(input::EpiAutoGPInput, transformation_name::String, n_ahead::Int, n_forecasts::Int) -> NamedTuple - -Prepare all data and configuration needed for NowcastAutoGP modeling. - -This function extracts stable training data (excluding nowcast dates), sets up data transformations, -formats nowcast data for the modeling pipeline, and calculates forecast dates and sample sizes. - -# Arguments -- `input::EpiAutoGPInput`: The input data structure containing dates, reports, and nowcast information -- `transformation_name::String`: Name of transformation to apply ("boxcox", "positive", "percentage") -- `n_ahead::Int`: Number of time steps (days or epiweeks) to forecast into the future -- `n_forecasts::Int`: Total number of forecast samples desired - -# Returns -A NamedTuple containing: -- `stable_data_dates::Vector{Date}`: Dates for confirmed/stable data (excluding nowcast dates) -- `stable_data_values::Vector{<:Real}`: Values for confirmed/stable data -- `nowcast_data`: Formatted nowcast data for NowcastAutoGP (empty if no nowcasts) -- `forecast_dates::Vector{Date}`: Dates for which forecasts will be generated -- `n_forecasts_per_nowcast::Int`: Number of forecast samples per nowcast scenario -- `transformation::Function`: Forward transformation function -- `inv_transformation::Function`: Inverse transformation function -""" -function prepare_for_modelling( - input::EpiAutoGPInput, transformation_name::String, - n_ahead::Int, n_forecasts::Int - ) - # Extract stable confirmed data, excluding recent uncertain dates with nowcasts - stable_data_idxs = findall(d -> !(d in input.nowcast_dates), input.dates) - stable_data_dates = input.dates[stable_data_idxs] - stable_data_values = input.reports[stable_data_idxs] - - # Get transformation functions - # Convert reports to Float64 to ensure concrete type for get_transformations - transformation, - inv_transformation = get_transformations(transformation_name, Float64.(input.reports)) - - # Format nowcast data (only if nowcasts exist) - nowcast_data = isempty(input.nowcast_dates) ? - # Return nothing when no nowcasts - nothing : - # Create nowcast data structure when nowcasts exist - create_nowcast_data(input.nowcast_reports, input.nowcast_dates; transformation) - - # Calculate forecasting dates (starting from forecast_date and going n_ahead time steps forward) - # Use Day or Week based on frequency - time_step = input.frequency == "epiweekly" ? Week(1) : Day(1) - forecast_dates = [input.forecast_date + i * time_step for i in 0:n_ahead] - - # Calculate number of forecasts per nowcast sample - n_forecasts_per_nowcast = isnothing(nowcast_data) ? - n_forecasts : - max(1, n_forecasts ÷ length(nowcast_data)) - - return (; - stable_data_dates, stable_data_values, nowcast_data, forecast_dates, - n_forecasts_per_nowcast, transformation, inv_transformation, - ) -end - -""" - fit_base_model(dates::Vector{Date}, values::Vector{<:Real}; - transformation::Function, - n_particles::Int=24, - smc_data_proportion::Float64=0.1, - n_mcmc::Int=50, - n_hmc::Int=50) -> AutoGP.Model - -Fit a base Gaussian Process model using NowcastAutoGP on confirmed/stable data. - -This function creates transformed data and fits a GP model using Sequential Monte Carlo (SMC) -sampling. The model will be used as the foundation for forecasting, either directly or in -combination with nowcast scenarios. - -# Arguments -- `dates::Vector{Date}`: Vector of observation dates in chronological order -- `values::Vector{<:Real}`: Vector of corresponding observation values -- `transformation::Function`: Data transformation function (from get_transformations) -- `n_particles::Int=24`: Number of SMC particles for model fitting -- `smc_data_proportion::Float64=0.1`: Proportion of data used in each SMC step -- `n_mcmc::Int=50`: Number of MCMC samples for structure exploration -- `n_hmc::Int=50`: Number of HMC samples for parameter updates - -# Returns -- Fitted AutoGP model ready for forecasting -""" -function fit_base_model( - dates::Vector{Date}, values::Vector{<:Real}; - transformation::Function, - n_particles::Int = 24, - smc_data_proportion::Float64 = 0.1, - n_mcmc::Int = 50, - n_hmc::Int = 50 - ) - - # Create transformed data - transformed_data = create_transformed_data(dates, values; transformation) - - # Fit the model - model = make_and_fit_model( - transformed_data; - n_particles = n_particles, - smc_data_proportion = smc_data_proportion, - n_mcmc = n_mcmc, - n_hmc = n_hmc - ) - - return model -end - -""" - function _do_forecasts(nowcast_data, base_model::AutoGP.Model, - forecast_dates, n_forecasts_per - -Internal function to handle forecasting with or without nowcast data by dispatching on nowcast_data type. -""" -function _do_forecasts( - nowcast_data, base_model, forecast_dates, - n_forecasts_per_nowcast::Int; inv_transformation::Function - ) - return forecast_with_nowcasts( - base_model, nowcast_data, forecast_dates, - n_forecasts_per_nowcast; - inv_transformation = inv_transformation - ) -end - -function _do_forecasts( - nowcast_data::Nothing, base_model, forecast_dates, - n_forecasts_per_nowcast::Int; inv_transformation::Function - ) - return forecast( - base_model, forecast_dates, - n_forecasts_per_nowcast; - inv_transformation = inv_transformation - ) -end - -""" - forecast_with_epiautogp(input::EpiAutoGPInput; - n_ahead::Int=8, - n_forecasts::Int=20, - transformation_name::String="boxcox", - n_particles::Int=24, - smc_data_proportion::Float64=0.1, - n_mcmc::Int=50, - n_hmc::Int=50) -> NamedTuple - -Main forecasting function that combines EpiAutoGP input with NowcastAutoGP modeling. - -This function implements the complete nowcasting and forecasting workflow: -1. Prepares stable training data and nowcast scenarios from EpiAutoGPInput -2. Fits a base GP model on confirmed data -3. Generates forecasts either directly (if no nowcasts) or incorporating nowcast uncertainty - -# Arguments -- `input::EpiAutoGPInput`: The input data structure with dates, reports, and nowcast information -- `n_ahead::Int=8`: Number of time steps (days or epiweeks) to forecast ahead from forecast_date -- `n_forecasts::Int=20`: Total number of forecast samples to generate -- `transformation_name::String="boxcox"`: Data transformation type ("boxcox", "positive", "percentage") -- `n_particles::Int=24`: Number of SMC particles for GP model fitting -- `smc_data_proportion::Float64=0.1`: Proportion of data used in each SMC step -- `n_mcmc::Int=50`: Number of MCMC samples for GP structure exploration -- `n_hmc::Int=50`: Number of HMC samples for GP parameter updates - -# Returns -A NamedTuple containing: -- `forecast_dates::Vector{Date}`: Dates for which forecasts were generated -- `forecasts::Matrix`: Forecast samples matrix (dates × samples) -- `forecast_date::Date`: The reference date for forecasting (from input.forecast_date) -- `location::String`: The location identifier (from input.location) -- `disease::String`: The disease name (from input.disease) -""" -function forecast_with_epiautogp( - input::EpiAutoGPInput; - n_ahead::Int = 8, - n_forecasts::Int = 20, - transformation_name::String = "boxcox", - n_particles::Int = 24, - smc_data_proportion::Float64 = 0.1, - n_mcmc::Int = 50, - n_hmc::Int = 50 - ) - - # Prepare training data, nowcasting data and forecasting dates - model_info = prepare_for_modelling(input, transformation_name, n_ahead, n_forecasts) - - # Fit base model on confirmed/stable data - base_model = fit_base_model( - model_info.stable_data_dates, model_info.stable_data_values; - transformation = model_info.transformation, - n_particles = n_particles, - smc_data_proportion = smc_data_proportion, - n_mcmc = n_mcmc, - n_hmc = n_hmc - ) - - # Generate forecasts - # Dispatches to use nowcast data if available - forecasts = _do_forecasts( - model_info.nowcast_data, base_model, - model_info.forecast_dates, model_info.n_forecasts_per_nowcast; - inv_transformation = model_info.inv_transformation - ) - - return (; - forecast_dates = model_info.forecast_dates, - forecasts = forecasts, - ) -end - -""" - forecast_with_epiautogp(input::EpiAutoGPInput, args::Dict{String, Any}) -> NamedTuple - -Run the complete EpiAutoGP modeling pipeline using parsed command-line arguments. - -This is the main entry point for command-line usage that combines EpiAutoGPInput data -with parsed command-line arguments to execute the full nowcasting and forecasting workflow. - -# Arguments -- `input::EpiAutoGPInput`: The input data structure with epidemiological time series -- `args::Dict{String, Any}`: Parsed command-line arguments from parse_arguments() - -# Returns -- Same as forecast_with_epiautogp(): NamedTuple with forecast results and metadata - -# Expected command-line arguments -- `"n-ahead"`: Number of time steps (days or epiweeks) to forecast -- `"n-forecast-draws"`: Total number of forecast samples -- `"transformation"`: Data transformation type -- `"n-particles"`: Number of SMC particles -- `"smc-data-proportion"`: SMC data proportion -- `"n-mcmc"`: Number of MCMC samples -- `"n-hmc"`: Number of HMC samples -""" -function forecast_with_epiautogp(input::EpiAutoGPInput, args::Dict{String, Any}) - return forecast_with_epiautogp( - input; - n_ahead = args["n-ahead"], - n_forecasts = args["n-forecast-draws"], - transformation_name = args["transformation"], - n_particles = args["n-particles"], - smc_data_proportion = args["smc-data-proportion"], - n_mcmc = args["n-mcmc"], - n_hmc = args["n-hmc"] - ) -end diff --git a/EpiAutoGP/src/output.jl b/EpiAutoGP/src/output.jl deleted file mode 100644 index 87bbef341..000000000 --- a/EpiAutoGP/src/output.jl +++ /dev/null @@ -1,267 +0,0 @@ -""" - AbstractForecastOutput - -Abstract base type for all forecast output formats in EpiAutoGP. -""" -abstract type AbstractForecastOutput end - -""" - AbstractHubverseOutput <: AbstractForecastOutput - -Abstract type for hubverse-compatible forecast outputs in CSV format. -""" -abstract type AbstractHubverseOutput <: AbstractForecastOutput end - -""" - PipelineOutput <: AbstractForecastOutput - -Abstract type for directly outputting forecasts as typical pipeline outputs for - `cfa-stf-routine-forecasting`. -""" -struct PipelineOutput <: AbstractForecastOutput end - -""" - QuantileOutput <: AbstractHubverseOutput - -Configuration for quantile-based forecast outputs compatible with hubverse specifications. -""" -@kwdef struct QuantileOutput <: AbstractHubverseOutput - quantile_levels::Vector{Float64} = [ - 0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, - 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99, - ] -end - -""" - _make_horizon_col(target_end_dates::Vector{Date}, reference_date::Date) -> Vector{Int} - -Calculate forecast horizons in weeks from reference date to target dates. - -This internal helper function computes the horizon column required for hubverse -forecast tables. Horizons represent the number of weeks between the reference -date (when the forecast was made) and each target date. - -# Arguments -- `target_end_dates::Vector{Date}`: Vector of forecast target dates -- `reference_date::Date`: Reference date for the forecast (forecast creation date) - -# Returns -- `Vector{Int}`: Vector of horizons in weeks (integer division by 7 days) -""" -function _make_horizon_col(target_end_dates::Vector{Date}, reference_date::Date) - return [Dates.value(d - reference_date) ÷ 7 for d in target_end_dates] -end - -""" - create_forecast_df(results::NamedTuple, output_type::QuantileOutput) -> DataFrame - -Convert EpiAutoGP forecast results to a basic DataFrame with quantile summaries. - -This function processes raw forecast samples from the EpiAutoGP model and computes -quantile summaries for each forecast date. The resulting DataFrame contains the -core forecast data needed for hubverse tables. - -# Arguments -- `results::NamedTuple`: Model results containing `forecast_dates` and `forecasts` - - `forecast_dates::Vector{Date}`: Dates for which forecasts were generated - - `forecasts::Matrix`: Forecast samples (dates × samples) -- `output_type::QuantileOutput`: Configuration specifying which quantiles to compute - -# Returns -- `DataFrame`: Basic forecast DataFrame with columns: - - `output_type_id`: Quantile level (e.g., 0.5 for median) - - `value`: Computed quantile value - - `target_end_date`: Date for which the forecast applies - - `output_type`: Always "quantile" for this method -""" -function create_forecast_df(results::NamedTuple, output_type::QuantileOutput) - # Extract relevant data - forecast_dates = results.forecast_dates - forecasts = results.forecasts - # Create a DataFrame with columns: output_type_id, value, target_end_date, output_type - forecast_df = DataFrame(output_type_id = Float64[], value = Float64[], target_end_date = Date[]) - # Populate the DataFrame row by row - for (date_idx, target_end_date) in enumerate(forecast_dates) - date_samples = forecasts[date_idx, :] - for q_level in output_type.quantile_levels - q_value = quantile(date_samples, q_level) - push!( - forecast_df, - ( - output_type_id = q_level, - value = q_value, - target_end_date = target_end_date, - ) - ) - end - end - # Add constant column for output_type, this method is specifically for quantiles - forecast_df[!, "output_type"] .= "quantile" - return forecast_df -end - -function create_forecast_df(results::NamedTuple, output_type::PipelineOutput) - # Extract relevant data - forecast_dates = results.forecast_dates - forecasts = results.forecasts - - # Create a DataFrame with columns: date, .value, .draw - forecast_df = mapreduce(vcat, enumerate(eachcol(forecasts))) do (draw, sampled_values) - DataFrame( - :date => forecast_dates, - Symbol(".value") => sampled_values, - Symbol(".draw") => fill(Int32(draw), length(sampled_values)) - ) - end - - return forecast_df -end - -function _quote_duckdb_string(value::AbstractString) - return "'" * replace(value, "'" => "''") * "'" -end - -function _write_parquet_with_duckdb(path::AbstractString, table) - con = DBInterface.connect(DuckDB.DB, ":memory:") - return try - DuckDB.register_data_frame(con, table, "forecast_samples") - DBInterface.execute( - con, - "COPY forecast_samples TO $(_quote_duckdb_string(path)) (FORMAT parquet)" - ) - finally - DBInterface.close(con) - end -end - - -""" - create_forecast_output(input, results, output_dir, output_type; kwargs...) -> DataFrame - -Create complete hubverse-compatible forecast table from EpiAutoGP results. - -This is the main function for generating hubverse forecast outputs. It combines -forecast results with metadata from the input to create a fully compliant -hubverse table, optionally saving it to disk. - -# Arguments -- `input::EpiAutoGPInput`: Original input data containing metadata -- `results::NamedTuple`: Model forecast results with `forecast_dates` and `forecasts` -- `output_dir::String`: Directory path for saving output files -- `output_type::AbstractHubverseOutput`: Output format configuration - -# Keyword Arguments -- `save_output::Bool`: Whether to save the table to a CSV file -- `disease_abbr::Dict{String, String}`: Disease name abbreviations (default: DEFAULT_PATHOGEN_DICT) -- `target_abbr::Dict{String, String}`: Target type abbreviations (default: DEFAULT_TARGET_DICT) -- `group_name::String`: Forecasting group identifier (default: DEFAULT_GROUP_NAME) -- `model_name::String`: Model identifier (default: DEFAULT_MODEL_NAME) - -# Returns -- `DataFrame`: Complete hubverse-compatible forecast table with columns: - - `output_type`: Type of forecast output ("quantile") - - `output_type_id`: Quantile level or other output identifier - - `value`: Forecast value - - `reference_date`: Date when forecast was made - - `target`: Target description (e.g., "wk inc covid hosp") - - `horizon`: Forecast horizon in weeks - - `target_end_date`: Date for which forecast applies - - `location`: Geographic location identifier -""" -function create_forecast_output( - input::EpiAutoGPInput, - results::NamedTuple, - output_dir::String, - output_type::AbstractHubverseOutput; - save_output::Bool, - disease_abbr::Dict{String, String} = DEFAULT_PATHOGEN_DICT, - target_abbr::Dict{String, String} = DEFAULT_TARGET_DICT, - group_name::String = DEFAULT_GROUP_NAME, - model_name::String = DEFAULT_MODEL_NAME - ) - # Extract relevant data - forecast_date = input.forecast_date - location = input.location - pathogen = input.pathogen - target = input.target - target_col_string = "wk inc $(disease_abbr[pathogen]) $(target_abbr[target])" - - # Create basic forecast DataFrame - forecast_df = create_forecast_df(results, output_type) - - # Add additional required columns - forecast_df[!, "reference_date"] .= forecast_date - forecast_df[!, "location"] .= location - forecast_df[!, "target"] .= target_col_string - - # Add horizon column - @transform!(forecast_df, :horizon = _make_horizon_col(:target_end_date, forecast_date)) - # Reorder columns and check all required columns are present - @select!( - forecast_df, - :output_type, :output_type_id, :value, :reference_date, :target, :horizon, :target_end_date, - :location - ) - - # Save as CSV to match expected format - if save_output - outputfilename = "$(string(forecast_date))-$(group_name)-$(model_name)-$(location)-$(disease_abbr[pathogen])-$(target).csv" - csv_path = joinpath(output_dir, outputfilename) - mkpath(dirname(csv_path)) - CSV.write(csv_path, forecast_df) - - @info "Saved hubverse forecast table to $csv_path" - end - - return forecast_df -end - -function create_forecast_output( - input::EpiAutoGPInput, - results::NamedTuple, - output_dir::String, - output_type::PipelineOutput; - save_output::Bool, - disease_abbr::Dict{String, String} = DEFAULT_PATHOGEN_DICT, - target_abbr::Dict{String, String} = DEFAULT_TARGET_DICT, - group_name::String = DEFAULT_GROUP_NAME, - model_name::String = DEFAULT_MODEL_NAME - ) - # Create basic forecast DataFrame with date, .draw, .value - forecast_df = create_forecast_df(results, output_type) - - # Determine variable name based on target and whether using percentages - variable_name = if input.target == "nhsn" - "observed_hospital_admissions" - else - Dict( - "observed" => "observed_ed_visits", - "other" => "other_ed_visits", - "pct" => "prop_disease_ed_visits" - )[input.ed_visit_type] - end - - # Input is in percentage format (0-100); convert to proportion (0-1) as R expects proportions for prop_ variables - if input.ed_visit_type == "pct" && input.target == "nssp" - forecast_df[!, Symbol(".value")] = forecast_df[!, Symbol(".value")] ./ 100.0 - end - - # Add .variable and resolution columns - forecast_df[!, Symbol(".variable")] .= variable_name - forecast_df[!, :resolution] .= input.frequency - - # Add metadata columns for hubverse compatibility - forecast_df[!, :geo_value] .= input.location - forecast_df[!, :disease] .= input.pathogen - - # Save as parquet if requested - if save_output - parquet_path = joinpath(output_dir, "samples.parquet") - mkpath(dirname(parquet_path)) - _write_parquet_with_duckdb(parquet_path, forecast_df) - - @info "Saved pipeline forecast samples to $parquet_path" - end - - return forecast_df -end diff --git a/EpiAutoGP/src/parse_arguments.jl b/EpiAutoGP/src/parse_arguments.jl deleted file mode 100644 index 7907ea5d7..000000000 --- a/EpiAutoGP/src/parse_arguments.jl +++ /dev/null @@ -1,65 +0,0 @@ -""" - parse_arguments() - -Parses command-line arguments for the EpiAutoGP model. - -# Arguments - -- `--json-input::String` (required): Path to JSON file containing model input data. -- `--output-dir::String` (required): Directory for saving model outputs. -- `--n-ahead::Int` (default: 8): Number of time steps (days or epiweeks) to forecast. -- `--n-particles::Int` (default: 24): Number of particles for SMC. -- `--n-mcmc::Int` (default: 100): Number of MCMC steps for GP kernel structure. -- `--n-hmc::Int` (default: 50): Number of HMC steps for GP kernel hyperparameters. -- `--n-forecast-draws::Int` (default: 2000): Number of forecast draws. -- `--transformation::String` (default: "boxcox"): Data transformation type ("boxcox", "positive", "percentage"). -- `--smc-data-proportion::Float64` (default: 0.1): Proportion of data used in each SMC step. - -# Returns - -A dictionary containing the parsed command-line arguments. -""" -function parse_arguments() - s = ArgParseSettings() - - @add_arg_table! s begin - "--json-input" - help = "Path to JSON file containing model input data" - arg_type = String - required = true - "--output-dir" - help = "Directory for saving model outputs" - arg_type = String - required = true - "--n-ahead" - help = "Number of time steps (days or epiweeks) to forecast" - arg_type = Int - default = 8 - "--n-particles" - help = "Number of particles for SMC" - arg_type = Int - default = 24 - "--n-mcmc" - help = "Number of MCMC steps for GP kernel structure" - arg_type = Int - default = 100 - "--n-hmc" - help = "Number of HMC steps for GP kernel hyperparameters" - arg_type = Int - default = 50 - "--n-forecast-draws" - help = "Number of forecast draws" - arg_type = Int - default = 2000 - "--transformation" - help = "Data transformation type (boxcox, positive, percentage)" - arg_type = String - default = "boxcox" - "--smc-data-proportion" - help = "Proportion of data used in each SMC step" - arg_type = Float64 - default = 0.1 - end - - return parse_args(s) -end diff --git a/EpiAutoGP/test/runtests.jl b/EpiAutoGP/test/runtests.jl deleted file mode 100644 index 1edf48e18..000000000 --- a/EpiAutoGP/test/runtests.jl +++ /dev/null @@ -1,19 +0,0 @@ -using Test, EpiAutoGP -using JSON3 -using ArgParse -using Dates -using DBInterface -using DuckDB -using CSV -using DataFramesMeta -using Random -using Statistics -using NowcastAutoGP - -# Run all tests in the test directory -include("test_parse_arguments.jl") -include("test_input.jl") -include("test_modelling.jl") -include("test_output.jl") - -println("All EpiAutoGP tests completed!") diff --git a/EpiAutoGP/test/test_input.jl b/EpiAutoGP/test/test_input.jl deleted file mode 100644 index a9108a2b0..000000000 --- a/EpiAutoGP/test/test_input.jl +++ /dev/null @@ -1,156 +0,0 @@ -function create_sample_input( - output_path::String; n_weeks::Int = 30, - pathogen::String = "COVID-19", location::String = "CA" - ) - start_date = Date("2024-01-01") - dates = [start_date + Week(i) for i in 0:(n_weeks - 1)] - reports = [rand(20:100) + 10 * sin(2π * i / 7) + rand() * 5 for i in 1:n_weeks] # Weekly pattern with noise - - forecast_date = dates[end] - nowcast_dates = dates[max(1, end - 2):end] # Last 3 days - nowcast_reports = [ - [reports[max(1, end - 2) + j - 1] + rand(-5:5) for j in 1:3] - for _ in 1:10 - ] # 10 realizations, each with 3 values - - input_data = EpiAutoGPInput( - dates, reports, pathogen, location, "nhsn", "epiweekly", "observed", - forecast_date, nowcast_dates, nowcast_reports - ) - - # Write to JSON file - open(output_path, "w") do f - JSON3.write(f, input_data) - end - - return input_data -end - -@testset "EpiAutoGPInput Tests" begin - @testset "Construction and Serialization" begin - # Test valid construction - dates = [Date("2024-01-01"), Date("2024-01-02"), Date("2024-01-03")] - reports = [45.0, 52.0, 38.0] - nowcast_reports = [[50.0, 36.0], [52.0, 38.0]] - - input_data = EpiAutoGPInput( - dates, reports, "COVID-19", "CA", "nhsn", "daily", "observed", - Date("2024-01-03"), dates[2:3], nowcast_reports - ) - - @test input_data.dates == dates - @test input_data.pathogen == "COVID-19" - @test length(input_data.nowcast_reports) == 2 - - # Test JSON round-trip - json_string = JSON3.write(input_data) - parsed = JSON3.read(json_string, EpiAutoGPInput) - @test parsed.dates == input_data.dates - @test parsed.pathogen == input_data.pathogen - end - - @testset "Data Validation - Valid Cases" begin - valid_data = EpiAutoGPInput( - [Date("2024-01-01"), Date("2024-01-02")], - [45.0, 52.0], - "COVID-19", "CA", "nhsn", "daily", "observed", - Date("2024-01-02"), - [Date("2024-01-02")], - [[50.0], [52.0]] - ) - @test validate_input(valid_data) == true - - # Test without nowcasts - no_nowcast = EpiAutoGPInput( - [Date("2024-01-01")], [10.0], "COVID-19", "TX", "nhsn", "daily", "observed", - Date("2024-01-01"), Date[], Vector{Real}[] - ) - @test validate_input(no_nowcast) == true - end - - @testset "Data Validation - Invalid Cases" begin - # Mismatched lengths - @test_throws ArgumentError validate_input( - EpiAutoGPInput( - [Date("2024-01-01"), Date("2024-01-02")], [45.0], - "COVID-19", "CA", "nhsn", "daily", "observed", Date("2024-01-01"), Date[], Vector{Real}[] - ) - ) - - # Empty data - @test_throws ArgumentError validate_input( - EpiAutoGPInput( - Date[], Real[], "COVID-19", "CA", "nhsn", "daily", "observed", Date("2024-01-01"), Date[], Vector{Real}[] - ) - ) - - # Unsorted dates - @test_throws ArgumentError validate_input( - EpiAutoGPInput( - [Date("2024-01-02"), Date("2024-01-01")], [45.0, 52.0], - "COVID-19", "CA", "nhsn", "daily", "observed", Date("2024-01-02"), Date[], Vector{Real}[] - ) - ) - - # Invalid nowcast structure - @test_throws ArgumentError validate_input( - EpiAutoGPInput( - [Date("2024-01-01")], [45.0], "COVID-19", "CA", "nhsn", "daily", "observed", Date("2024-01-01"), - [Date("2024-01-01")], [[50.0, 55.0]] # Wrong length - ) - ) - - # Negative values - @test_throws ArgumentError validate_input( - EpiAutoGPInput( - [Date("2024-01-01")], [-5.0], "COVID-19", "CA", "nhsn", "daily", "observed", Date("2024-01-01"), Date[], Vector{Real}[] - ) - ) - end - - @testset "File I/O" begin - tmpfile = tempname() * ".json" - try - test_data = Dict( - "dates" => ["2024-01-01", "2024-01-02"], - "reports" => [45.0, 52.0], - "pathogen" => "COVID-19", - "location" => "CA", - "target" => "nhsn", - "frequency" => "daily", - "ed_visit_type" => "observed", - "forecast_date" => "2024-01-02", - "nowcast_dates" => ["2024-01-02"], - "nowcast_reports" => [[50.0], [55.0]] - ) - - open(tmpfile, "w") do f - JSON3.write(f, test_data) - end - - loaded = read_and_validate_data(tmpfile) - @test loaded.pathogen == "COVID-19" - @test length(loaded.dates) == 2 - finally - isfile(tmpfile) && rm(tmpfile) - end - end - - @testset "Sample Creation" begin - tmpdir = mktempdir() - try - json_path = joinpath(tmpdir, "sample.json") - sample = create_sample_input(json_path; n_weeks = 14, pathogen = "Influenza") - - @test validate_input(sample) == true - @test sample.pathogen == "Influenza" - @test length(sample.dates) == 14 - @test isfile(json_path) - - loaded = read_and_validate_data(json_path) - @test loaded.pathogen == sample.pathogen - finally - rm(tmpdir, recursive = true) - end - end -end diff --git a/EpiAutoGP/test/test_modelling.jl b/EpiAutoGP/test/test_modelling.jl deleted file mode 100644 index e40c54f27..000000000 --- a/EpiAutoGP/test/test_modelling.jl +++ /dev/null @@ -1,101 +0,0 @@ -@testset "Modelling Functions Tests" begin - - # Helper function to create test input data - function create_test_input(; include_nowcasts = true) - dates = [Date(2024, 1, 1) + Week(i - 1) for i in 1:10] - reports = Float64[1000, 1100, 1050, 1150, 1200, 1250, 1300, 1350, 1400, 1450] - - if include_nowcasts - nowcast_dates = dates[(end - 1):end] - nowcast_reports = [Float64[1350, 1400], Float64[1400, 1450]] - else - nowcast_dates = Date[] - nowcast_reports = Vector{Float64}[] - end - - return EpiAutoGPInput( - dates, reports, "COVID-19", "US", "nhsn", "epiweekly", "observed", - dates[end], nowcast_dates, nowcast_reports - ) - end - - @testset "prepare_for_modelling" begin - # Test with nowcasts - input_with_nowcasts = create_test_input(include_nowcasts = true) - result = prepare_for_modelling(input_with_nowcasts, "boxcox", 4, 100) - - @test haskey(result, :stable_data_dates) - @test haskey(result, :transformation) - @test length(result.stable_data_dates) == 8 # 10 total - 2 nowcast - @test length(result.forecast_dates) == 5 # 0, 1, 2, 3, 4 weeks ahead - @test result.forecast_dates[1] == input_with_nowcasts.forecast_date # Starts at week 0 - @test ~isnothing(result.nowcast_data) # Should have nowcast data - - # Test without nowcasts - input_no_nowcasts = create_test_input(include_nowcasts = false) - result_no_nowcast = prepare_for_modelling(input_no_nowcasts, "boxcox", 4, 100) - - @test haskey(result_no_nowcast, :nowcast_data) - @test isnothing(result_no_nowcast.nowcast_data) # Should be nothing when no nowcasts - @test length(result_no_nowcast.stable_data_dates) == 10 # All data is stable - end - - @testset "fit_base_model" begin - input = create_test_input(include_nowcasts = true) - prep_result = prepare_for_modelling(input, "positive", 2, 50) - - model = fit_base_model( - prep_result.stable_data_dates, prep_result.stable_data_values; - transformation = prep_result.transformation, - n_particles = 1, smc_data_proportion = 0.5, n_mcmc = 3, n_hmc = 3 - ) - - @test model !== nothing - end - - @testset "forecast_with_epiautogp" begin - input = create_test_input(include_nowcasts = false) - - forecast_dates, - forecasts = forecast_with_epiautogp( - input; - n_ahead = 2, n_forecasts = 10, - transformation_name = "positive", - n_particles = 1, smc_data_proportion = 0.5, n_mcmc = 3, n_hmc = 3 - ) - - @test length(forecast_dates) == 3 # 0, 1, 2 weeks ahead - @test size(forecasts, 1) == 3 - @test size(forecasts, 2) == 10 - @test all(forecasts .> 0) - end - - @testset "forecast_with_epiautogp - daily frequency" begin - # Create daily frequency test data - dates = [Date(2024, 1, 1) + Day(i - 1) for i in 1:30] - reports = Float64[100 + 10 * sin(i / 5) + 2 * i for i in 1:30] - - input = EpiAutoGPInput( - dates, reports, "COVID-19", "US", "nhsn", "daily", "observed", - dates[end], Date[], Vector{Float64}[] - ) - - forecast_dates, - forecasts = forecast_with_epiautogp( - input; - n_ahead = 7, n_forecasts = 10, - transformation_name = "positive", - n_particles = 1, smc_data_proportion = 0.5, n_mcmc = 3, n_hmc = 3 - ) - - @test length(forecast_dates) == 8 # 0, 1, 2, ..., 7 days ahead - @test size(forecasts, 1) == 8 - @test size(forecasts, 2) == 10 - @test all(forecasts .> 0) - - # Verify forecast dates increment by Day(1) - @test forecast_dates[1] == dates[end] - @test forecast_dates[2] == dates[end] + Day(1) - @test forecast_dates[end] == dates[end] + Day(7) - end -end diff --git a/EpiAutoGP/test/test_output.jl b/EpiAutoGP/test/test_output.jl deleted file mode 100644 index c48c8bfde..000000000 --- a/EpiAutoGP/test/test_output.jl +++ /dev/null @@ -1,150 +0,0 @@ -@testset "Output Types and Structures Tests" begin - @testset "QuantileOutput Construction" begin - default_output = QuantileOutput() - @test length(default_output.quantile_levels) == 23 - @test 0.5 in default_output.quantile_levels - @test issorted(default_output.quantile_levels) - - custom_output = QuantileOutput(quantile_levels = [0.25, 0.5, 0.75]) - @test custom_output.quantile_levels == [0.25, 0.5, 0.75] - end -end - -@testset "create_forecast_df Function Tests" begin - @testset "Basic Functionality" begin - forecast_dates = [Date("2024-01-01"), Date("2024-01-02")] - forecasts = rand(2, 100) .* 50 .+ 25 - output_type = QuantileOutput(quantile_levels = [0.25, 0.5, 0.75]) - - result_df = create_forecast_df( - (forecast_dates = forecast_dates, forecasts = forecasts), output_type - ) - - @test isa(result_df, DataFrame) - @test size(result_df, 1) == 6 # 2 dates × 3 quantiles - @test all(result_df.output_type .== "quantile") - @test Set(unique(result_df.output_type_id)) == Set([0.25, 0.5, 0.75]) - - # Test quantile ordering - for date_obj in forecast_dates - date_rows = result_df[result_df.target_end_date .== date_obj, :] - q25 = date_rows[date_rows.output_type_id .== 0.25, :].value[1] - q50 = date_rows[date_rows.output_type_id .== 0.5, :].value[1] - q75 = date_rows[date_rows.output_type_id .== 0.75, :].value[1] - @test q25 <= q50 <= q75 - end - end -end - -@testset "create_forecast_output Function Tests" begin - @testset "End-to-end Functionality" begin - # Create input data - input = EpiAutoGPInput( - [Date("2024-01-01")], - [100.0], - "COVID-19", - "CA", - "nhsn", - "epiweekly", - "observed", - Date("2024-01-01"), - Date[], - Vector{Real}[] - ) - - # Create forecast results - forecast_dates = [Date("2024-01-08"), Date("2024-01-15")] - forecasts = rand(2, 50) .* 100 .+ 50 - results = (forecast_dates = forecast_dates, forecasts = forecasts) - - output_type = QuantileOutput(quantile_levels = [0.5]) - - tmpdir = mktempdir() - try - result_df = create_forecast_output( - input, results, tmpdir, output_type; - save_output = true - ) - - @test isa(result_df, DataFrame) - @test size(result_df, 1) == 2 - @test all(result_df.location .== "CA") - @test all(result_df.target .== "wk inc covid hosp") - - # Check file was saved - csv_files = filter(f -> endswith(f, ".csv"), readdir(tmpdir)) - @test length(csv_files) == 1 - finally - rm(tmpdir, recursive = true) - end - end - - @testset "PipelineOutput writes samples parquet with Date column" begin - input = EpiAutoGPInput( - [Date("2024-01-01")], - [100.0], - "COVID-19", - "CA", - "nhsn", - "epiweekly", - "observed", - Date("2024-01-01"), - Date[], - Vector{Real}[] - ) - - forecast_dates = [Date("2024-01-08"), Date("2024-01-15")] - forecasts = reshape([10.0, 20.0, 30.0, 40.0], 2, 2) - results = (forecast_dates = forecast_dates, forecasts = forecasts) - - tmpdir = mktempdir() - try - result_df = create_forecast_output( - input, results, tmpdir, PipelineOutput(); - save_output = true - ) - - parquet_path = joinpath(tmpdir, "samples.parquet") - @test isfile(parquet_path) - @test eltype(result_df.date) == Date - @test eltype(result_df[!, Symbol(".draw")]) == Int32 - @test propertynames(result_df) == [ - :date, - Symbol(".value"), - Symbol(".draw"), - Symbol(".variable"), - :resolution, - :geo_value, - :disease, - ] - - con = DBInterface.connect(DuckDB.DB, ":memory:") - try - read_df = DataFrame( - DBInterface.execute( - con, - "SELECT * FROM read_parquet($(EpiAutoGP._quote_duckdb_string(parquet_path)))" - ) - ) - - @test eltype(read_df.date) == Date - @test eltype(read_df[!, Symbol(".draw")]) <: Integer - @test propertynames(read_df) == propertynames(result_df) - @test read_df.date == forecast_dates[[1, 2, 1, 2]] - @test read_df[!, Symbol(".draw")] == [1, 1, 2, 2] - @test read_df[!, Symbol(".value")] == [10.0, 20.0, 30.0, 40.0] - @test all( - read_df[!, Symbol(".variable")] .== - "observed_hospital_admissions" - ) - @test all(read_df.resolution .== "epiweekly") - @test all(read_df.geo_value .== "CA") - @test all(read_df.disease .== "COVID-19") - finally - DBInterface.close(con) - end - finally - rm(tmpdir, recursive = true) - end - end -end diff --git a/EpiAutoGP/test/test_parse_arguments.jl b/EpiAutoGP/test/test_parse_arguments.jl deleted file mode 100644 index 098cc6525..000000000 --- a/EpiAutoGP/test/test_parse_arguments.jl +++ /dev/null @@ -1,29 +0,0 @@ -@testset "parse_arguments tests" begin - @testset "argument parsing with defaults" begin - # Mock command line arguments with only required arguments - test_args = [ - "--json-input", "path/to/json", - "--output-dir", "/path/to/output", - ] - - old_args = copy(ARGS) - try - empty!(ARGS) - append!(ARGS, test_args) - - parsed = parse_arguments() - - # Test required arguments - @test parsed["json-input"] == "path/to/json" - @test parsed["output-dir"] == "/path/to/output" - - # Test key default values - @test parsed["n-ahead"] == 8 - @test parsed["transformation"] == "boxcox" - - finally - empty!(ARGS) - append!(ARGS, old_args) - end - end -end diff --git a/pipelines/epiautogp/Manifest.toml b/pipelines/epiautogp/Manifest.toml new file mode 100644 index 000000000..245716320 --- /dev/null +++ b/pipelines/epiautogp/Manifest.toml @@ -0,0 +1,862 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.9" +manifest_format = "2.0" +project_hash = "619951374b6b2d55af993e5506c102c02e73b94a" + +[[deps.ANSIColoredPrinters]] +git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" +uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" +version = "0.0.1" + +[[deps.AbstractTrees]] +git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.5" + +[[deps.AliasTables]] +deps = ["PtrArrays", "Random"] +git-tree-sha1 = "9876e1e164b144ca45e9e3198d0b689cadfed9ff" +uuid = "66dad0bd-aa9a-41b7-9441-69ab47430ed8" +version = "1.1.3" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.2" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.AutoGP]] +deps = ["CSV", "DataFrames", "Dates", "Distributions", "DocStringExtensions", "Documenter", "Gen", "LinearAlgebra", "Match", "Parameters", "Printf", "Random", "Serialization", "Statistics"] +git-tree-sha1 = "07339f63ff606e21383473b4d15fd51f360cf9d0" +uuid = "6eb593e7-dfb4-4e48-b98e-d7222cdf0053" +version = "0.1.19" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.BitIntegers]] +deps = ["Random"] +git-tree-sha1 = "091d591a060e43df1dd35faab3ca284925c48e46" +uuid = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1" +version = "0.3.7" + +[[deps.BoxCox]] +deps = ["DocStringExtensions", "LinearAlgebra", "NLopt", "PrecompileTools", "Printf", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] +git-tree-sha1 = "476d410f5029b22814bfcba9c213abbb51c1ad94" +uuid = "1248164d-f7a6-4bdb-8e8d-8c4a187b3ce6" +version = "0.3.8" + + [deps.BoxCox.extensions] + BoxCoxMakieExt = "Makie" + BoxCoxMixedModelsExt = ["MixedModels", "Tables"] + BoxCoxStatsModelsExt = ["StatsModels", "Tables"] + + [deps.BoxCox.weakdeps] + Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" + MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" + StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" + Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" + +[[deps.CEnum]] +git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.5.0" + +[[deps.CSV]] +deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] +git-tree-sha1 = "8d8e0b0f350b8e1c91420b5e64e5de774c2f0f4d" +uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +version = "0.10.16" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "12177ad6b3cad7fd50c8b3825ce24a99ad61c18f" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.26.1" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "962834c22b66e32aa10f7611c08c8ca4e20749a9" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.8" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools"] +git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.1" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "9d8a54ce4b17aa5bdce0ea5c34bc5e7c340d16ad" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.18.1" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.Crayons]] +git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" +uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +version = "4.1.1" + +[[deps.DBInterface]] +git-tree-sha1 = "a444404b3f94deaa43ca2a58e18153a82695282b" +uuid = "a10d1c49-ce27-4219-8d33-6db1a4562965" +version = "2.6.1" + +[[deps.DataAPI]] +git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.16.0" + +[[deps.DataFrames]] +deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "5fab31e2e01e70ad66e3e24c968c264d1cf166d6" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "1.8.2" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "4e1fe97fdaed23e9dc21d4d664bea76b65fc50a0" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.22" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.Distributions]] +deps = ["AliasTables", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] +git-tree-sha1 = "e421c1938fafab0165b04dc1a9dbe2a26272952c" +uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" +version = "0.25.125" + + [deps.Distributions.extensions] + DistributionsChainRulesCoreExt = "ChainRulesCore" + DistributionsDensityInterfaceExt = "DensityInterface" + DistributionsTestExt = "Test" + + [deps.Distributions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.DocStringExtensions]] +git-tree-sha1 = "7442a5dfe1ebb773c29cc2962a8980f47221d76c" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.5" + +[[deps.Documenter]] +deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"] +git-tree-sha1 = "56e9c37b5e7c3b4f080ab1da18d72d5c290e184a" +uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +version = "1.17.0" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.DuckDB]] +deps = ["DBInterface", "Dates", "DuckDB_jll", "FixedPointDecimals", "Tables", "UUIDs", "WeakRefStrings"] +git-tree-sha1 = "656133510fa02a4f70a9d3ce6c1d083318406550" +uuid = "d2f5444f-75bc-4fdf-ac35-56f514c445e1" +version = "1.5.2" + +[[deps.DuckDB_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "4f4bc0e8be87d6ab270a07caa182808958bff9fe" +uuid = "2cbbab25-fc8b-58cf-88d4-687a02676033" +version = "1.5.2+0" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8f05e9a2e7c2e3eb524102bb2926c5743c07fbe1" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.8.0+0" + +[[deps.FilePathsBase]] +deps = ["Compat", "Dates"] +git-tree-sha1 = "3bab2c5aa25e7840a4b065805c0cdfc01f3068d2" +uuid = "48062228-2e41-5def-b9a4-89aafe57970f" +version = "0.9.24" +weakdeps = ["Mmap", "Test"] + + [deps.FilePathsBase.extensions] + FilePathsBaseMmapExt = "Mmap" + FilePathsBaseTestExt = "Test" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +version = "1.11.0" + +[[deps.FillArrays]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "2f979084d1e13948a3352cf64a25df6bd3b4dca3" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.16.0" +weakdeps = ["PDMats", "SparseArrays", "StaticArrays", "Statistics"] + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStaticArraysExt = "StaticArrays" + FillArraysStatisticsExt = "Statistics" + +[[deps.FixedPointDecimals]] +deps = ["BitIntegers", "Parsers"] +git-tree-sha1 = "41d3a5de0eab320cc04833a373f0fcb3640073d5" +uuid = "fb4d412d-6eee-574d-9565-ede6634db7b0" +version = "0.6.5" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "afb7c51ac63e40708a3071f80f5e84a752299d4f" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.39" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.FunctionWrappers]] +git-tree-sha1 = "d62485945ce5ae9c0c48f124a84998d755bae00e" +uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e" +version = "1.1.3" + +[[deps.FunctionalCollections]] +deps = ["Test"] +git-tree-sha1 = "04cb9cfaa6ba5311973994fe3496ddec19b6292a" +uuid = "de31a74c-ac4f-5751-b3fd-e18cd04993ca" +version = "0.5.0" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" +version = "1.11.0" + +[[deps.Gen]] +deps = ["Compat", "DataStructures", "Distributions", "ForwardDiff", "FunctionalCollections", "JSON", "LinearAlgebra", "MacroTools", "Parameters", "Random", "ReverseDiff", "SpecialFunctions"] +git-tree-sha1 = "2c87c7885d3ea7637847e3b7682abefde6eae45e" +uuid = "ea4f424c-a589-11e8-07c0-fd5c91b9da4a" +version = "0.4.8" + +[[deps.Git]] +deps = ["Git_LFS_jll", "Git_jll", "JLLWrappers", "OpenSSH_jll"] +git-tree-sha1 = "824a1890086880696fc908fe12a17bcf61738bd8" +uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +version = "1.5.0" + +[[deps.Git_LFS_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "bb8471f313ed941f299aa53d32a94ab3bee08844" +uuid = "020c3dae-16b3-5ae5-87b3-4cb189e250b2" +version = "3.7.0+0" + +[[deps.Git_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "0dd4cfb426924210c8f42742751cbde74b27bfa3" +uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" +version = "2.54.0+0" + +[[deps.HypergeometricFunctions]] +deps = ["LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] +git-tree-sha1 = "68c173f4f449de5b438ee67ed0c9c748dc31a2ec" +uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a" +version = "0.3.28" + +[[deps.IOCapture]] +deps = ["Logging", "Random"] +git-tree-sha1 = "0ee181ec08df7d7c911901ea38baf16f755114dc" +uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" +version = "1.0.0" + +[[deps.InlineStrings]] +git-tree-sha1 = "8f3d257792a522b4601c24a577954b0a8cd7334d" +uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" +version = "1.4.5" + + [deps.InlineStrings.extensions] + ArrowTypesExt = "ArrowTypes" + ParsersExt = "Parsers" + + [deps.InlineStrings.weakdeps] + ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.InvertedIndices]] +git-tree-sha1 = "6da3c4316095de0f5ee2ebd875df8721e7e0bdbe" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.3.1" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "b2d91fe939cae05960e760110b328288867b5758" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.6" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7204148362dafe5fe6a273f855b8ccbe4df8173e" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.8.0" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.JSON3]] +deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"] +git-tree-sha1 = "411eccfe8aba0814ffa0fdf4860913ed09c34975" +uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +version = "1.14.3" + + [deps.JSON3.extensions] + JSON3ArrowExt = ["ArrowTypes"] + + [deps.JSON3.weakdeps] + ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + +[[deps.LaTeXStrings]] +git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.4.0" + +[[deps.LazilyInitializedFields]] +git-tree-sha1 = "0f2da712350b020bc3957f269c9caad516383ee0" +uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" +version = "1.3.0" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.6.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +version = "1.11.0" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.7.2+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "be484f5c92fad0bd8acfef35fe017900b0b73809" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.18.0+0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +version = "1.11.0" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "13ca9e2586b89836fd20cccf56e57e2b9ae7f38f" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.29" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.MacroTools]] +git-tree-sha1 = "1e0228a030642014fe5cfe68c2c0a818f9e3f522" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.16" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.MarkdownAST]] +deps = ["AbstractTrees", "Markdown"] +git-tree-sha1 = "93c718d892e73931841089cdc0e982d6dd9cc87b" +uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" +version = "0.1.3" + +[[deps.Match]] +deps = ["MacroTools", "OrderedCollections"] +git-tree-sha1 = "58c5c5db26f2f0512facb359991410b7b5982c38" +uuid = "7eb4fadd-790c-5f42-8a69-bfa0b872bfbf" +version = "2.4.1" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.6+0" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.2.0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" +version = "1.11.0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.12.12" + +[[deps.NLopt]] +deps = ["CEnum", "NLopt_jll"] +git-tree-sha1 = "624785b15005a0e0f4e462b27ee745dbe5941863" +uuid = "76087f3c-5699-56af-9a33-bf431cd00edd" +version = "1.2.1" + + [deps.NLopt.extensions] + NLoptMathOptInterfaceExt = ["MathOptInterface"] + + [deps.NLopt.weakdeps] + MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" + +[[deps.NLopt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "b0154a615d5b2b6cf7a2501123b793577d0b9950" +uuid = "079eb43e-fd8e-5478-9966-2cf3e3edb778" +version = "2.10.0+0" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "9b8215b1ee9e78a293f99797cd31375471b2bcae" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.1.3" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.NowcastAutoGP]] +deps = ["AutoGP", "BoxCox", "Dates", "LogExpFunctions"] +git-tree-sha1 = "f01f3a1a0ae6c98450c179cc627a05d8ee358c9b" +repo-rev = "v0.3.0" +repo-url = "https://github.com/CDCgov/NowcastAutoGP.git" +uuid = "7e9f7f4b-f590-4c14-8324-de4fcbed18f7" +version = "0.3.0" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.27+1" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.5+0" + +[[deps.OpenSSH_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll", "Zlib_jll"] +git-tree-sha1 = "57baa4b81a24c2910afbb6d853aa0685e4312bf7" +uuid = "9bd350c2-7e96-507f-8002-3f2e150b4e1b" +version = "10.3.1+0" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "2ac022577e5eac7da040de17776d51bb770cd895" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.5.6+0" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1346c9208249809840c91b26703912dff463d335" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.6+0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "05868e21324cede2207c6f0f466b4bfef6d5e7ee" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.8.1" + +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+1" + +[[deps.PDMats]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "e4cff168707d441cd6bf3ff7e4832bdf34278e4a" +uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" +version = "0.11.37" +weakdeps = ["StatsBase"] + + [deps.PDMats.extensions] + StatsBaseExt = "StatsBase" + +[[deps.Parameters]] +deps = ["OrderedCollections", "UnPack"] +git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" +uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" +version = "0.12.3" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "5d5e0a78e971354b1c7bff0655d11fdc1b0e12c8" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.4" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.11.0" +weakdeps = ["REPL"] + + [deps.Pkg.extensions] + REPLExt = "REPL" + +[[deps.PooledArrays]] +deps = ["DataAPI", "Future"] +git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "1.4.3" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "8b770b60760d4451834fe79dd483e318eee709c4" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.5.2" + +[[deps.PrettyTables]] +deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "REPL", "Reexport", "StringManipulation", "Tables"] +git-tree-sha1 = "624de6279ab7d94fc9f672f0068107eb6619732c" +uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" +version = "3.3.2" + + [deps.PrettyTables.extensions] + PrettyTablesTypstryExt = "Typstry" + + [deps.PrettyTables.weakdeps] + Typstry = "f0ed7684-a786-439e-b1e3-3b82803b501e" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.PtrArrays]] +git-tree-sha1 = "4fbbafbc6251b883f4d2705356f3641f3652a7fe" +uuid = "43287f4e-b6f4-7ad1-bb20-aadabca52c3d" +version = "1.4.0" + +[[deps.QuadGK]] +deps = ["DataStructures", "LinearAlgebra"] +git-tree-sha1 = "5e8e8b0ab68215d7a2b14b9921a946fee794749e" +uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +version = "2.11.3" + + [deps.QuadGK.extensions] + QuadGKEnzymeExt = "Enzyme" + + [deps.QuadGK.weakdeps] + Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.RegistryInstances]] +deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] +git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" +uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" +version = "0.1.0" + +[[deps.ReverseDiff]] +deps = ["ChainRulesCore", "DiffResults", "DiffRules", "ForwardDiff", "FunctionWrappers", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NaNMath", "Random", "SpecialFunctions", "StaticArrays", "Statistics"] +git-tree-sha1 = "f1b07322a8cdc0d46812473b37fb72f69ec07b22" +uuid = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +version = "1.16.2" + +[[deps.Rmath]] +deps = ["Random", "Rmath_jll"] +git-tree-sha1 = "5b3d50eb374cea306873b371d3f8d3915a018f0b" +uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" +version = "0.9.0" + +[[deps.Rmath_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "58cdd8fb2201a6267e1db87ff148dd6c1dbd8ad8" +uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" +version = "0.5.1+0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "ebe7e59b37c400f694f52b58c93d26201387da70" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.4.9" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "64d974c2e6fdf07f8155b5b2ca2ffa9069b608d9" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.2" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.11.0" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "2700b235561b0335d5bef7097a111dc513b8655e" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.7.2" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "246a8bb2e6667f832eea063c3a56aef96429a3db" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.9.18" +weakdeps = ["ChainRulesCore", "Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "6ab403037779dae8c514bad259f32a447262455a" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.4" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.11.1" +weakdeps = ["SparseArrays"] + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + +[[deps.StatsAPI]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "178ed29fd5b2a2cfc3bd31c13375ae925623ff36" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.8.0" + +[[deps.StatsBase]] +deps = ["AliasTables", "DataAPI", "DataStructures", "IrrationalConstants", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "aceda6f4e598d331548e04cc6b2124a6148138e3" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.34.10" + +[[deps.StatsFuns]] +deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"] +git-tree-sha1 = "91f091a8716a6bb38417a6e6f274602a19aaa685" +uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +version = "1.5.2" + + [deps.StatsFuns.extensions] + StatsFunsChainRulesCoreExt = "ChainRulesCore" + StatsFunsInverseFunctionsExt = "InverseFunctions" + + [deps.StatsFuns.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.StringManipulation]] +deps = ["PrecompileTools"] +git-tree-sha1 = "d05693d339e37d6ab134c5ab53c29fce5ee5d7d5" +uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" +version = "0.4.4" + +[[deps.StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "159331b30e94d7b11379037feeb9b690950cace8" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.11.0" + +[[deps.StyledStrings]] +uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b" +version = "1.11.0" + +[[deps.SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.7.0+0" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "f2c1efbc8f3a609aadf318094f8fc5204bdaf344" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.12.1" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" + +[[deps.TranscodingStreams]] +git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.11.3" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.WeakRefStrings]] +deps = ["DataAPI", "InlineStrings", "Parsers"] +git-tree-sha1 = "0716e01c3b40413de5dedbc9c5c69f27cddfddfc" +uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" +version = "1.4.3" + +[[deps.WorkerUtilities]] +git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7" +uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" +version = "1.6.1" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.11.0+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.59.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" diff --git a/pipelines/epiautogp/Project.toml b/pipelines/epiautogp/Project.toml new file mode 100644 index 000000000..84e9836b1 --- /dev/null +++ b/pipelines/epiautogp/Project.toml @@ -0,0 +1,20 @@ +[deps] +DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1" +JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +NowcastAutoGP = "7e9f7f4b-f590-4c14-8324-de4fcbed18f7" +StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" + +[sources] +NowcastAutoGP = {rev = "v0.3.0", url = "https://github.com/CDCgov/NowcastAutoGP.git"} + +[compat] +DBInterface = "2.6.1" +DataFrames = "1.8.2" +Dates = "1.11.0" +DuckDB = "1.5.2" +JSON3 = "1.14.3" +NowcastAutoGP = "0.3.0" +StructTypes = "1.11.0" diff --git a/pipelines/epiautogp/fit_epiautogp.jl b/pipelines/epiautogp/fit_epiautogp.jl new file mode 100644 index 000000000..2a5487384 --- /dev/null +++ b/pipelines/epiautogp/fit_epiautogp.jl @@ -0,0 +1,401 @@ +#!/usr/bin/env julia + +using Dates, DataFrames, JSON3, StructTypes +using DBInterface: close, connect, execute +using DuckDB: DB, register_data_frame +using NowcastAutoGP: + create_nowcast_data, + create_transformed_data, + forecast, + forecast_with_nowcasts, + get_transformations, + make_and_fit_model + +""" +Input data structure for EpiAutoGP pipeline, +representing the JSON input format expected by the pipeline. +""" +struct EpiAutoGPInput + dates::Vector{Date} + reports::Vector{Float64} + pathogen::String + location::String + target::String + frequency::String + ed_visit_type::String + forecast_date::Date + nowcast_dates::Vector{Date} + nowcast_reports::Vector{Vector{Float64}} +end + +StructType(::Type{EpiAutoGPInput}) = Struct() + +const DEFAULT_ARGS = Dict{String, Any}( + "n-ahead" => 8, + "n-particles" => 24, + "n-mcmc" => 100, + "n-hmc" => 50, + "n-forecast-draws" => 2000, + "transformation" => "boxcox", + "smc-data-proportion" => 0.1, +) +const REQUIRED_ARGS = Set(["json-input", "output-dir"]) +const INT_ARGS = Set( + [ + "n-ahead", + "n-particles", + "n-mcmc", + "n-hmc", + "n-forecast-draws", + ] +) +const FLOAT_ARGS = Set(["smc-data-proportion"]) +const STRING_ARGS = Set(["json-input", "output-dir", "transformation"]) +const VALID_ARGS = union(keys(DEFAULT_ARGS), REQUIRED_ARGS) + +function _parse_argument_value(name::String, value::String) + if name in INT_ARGS + parsed = tryparse(Int, value) + isnothing(parsed) && throw(ArgumentError("--$name must be an integer, got '$value'")) + return parsed + elseif name in FLOAT_ARGS + parsed = tryparse(Float64, value) + isnothing(parsed) && throw(ArgumentError("--$name must be a float, got '$value'")) + return parsed + elseif name in STRING_ARGS + return value + end + + throw(ArgumentError("Unknown argument --$name")) +end + +function parse_arguments(args = ARGS) + parsed_args = copy(DEFAULT_ARGS) + i = firstindex(args) + + while i <= lastindex(args) + token = args[i] + startswith(token, "--") || + throw(ArgumentError("Expected argument beginning with '--', got '$token'")) + + raw_name_value = token[3:end] + equals_index = findfirst(isequal('='), raw_name_value) + if isnothing(equals_index) + name = replace(raw_name_value, "_" => "-") + i == lastindex(args) && + throw(ArgumentError("Missing value for argument --$name")) + value = args[i + 1] + i += 2 + else + name = replace(raw_name_value[begin:(equals_index - 1)], "_" => "-") + value = raw_name_value[(equals_index + 1):end] + i += 1 + end + + name in VALID_ARGS || throw(ArgumentError("Unknown argument --$name")) + parsed_args[name] = _parse_argument_value(name, value) + end + + for name in REQUIRED_ARGS + haskey(parsed_args, name) || + throw(ArgumentError("Missing required argument --$name")) + end + + return parsed_args +end + +""" +JSON reader that parses the input JSON file into an EpiAutoGPInput struct. +""" +function read_data(path_to_json::String) + return JSON3.read(read(path_to_json, String), EpiAutoGPInput) +end + +################ Validation and preparation of input data for modeling +function _require(condition::Bool, message::String) + condition || throw(ArgumentError(message)) + return nothing +end + +function _validate_frequency(data::EpiAutoGPInput) + valid_frequencies = ["daily", "epiweekly"] + _require( + data.frequency in valid_frequencies, + "Frequency must be one of $(valid_frequencies), got '$(data.frequency)'", + ) + return nothing +end + +function _validate_nonnegative_finite_values(values; label::String) + for (i, value) in enumerate(values) + _require( + isfinite(value) && value >= 0, + "$label[$i] must be a non-negative finite number, got $value", + ) + end + return nothing +end + +function _validate_observations(data::EpiAutoGPInput) + _require( + !isempty(data.dates) && !isempty(data.reports), + "Empty data: dates and reports cannot be empty", + ) + _validate_nonnegative_finite_values(data.reports; label = "reports") + return nothing +end + +function _validate_nowcasts(data::EpiAutoGPInput) + _require( + !isempty(data.nowcast_dates) || isempty(data.nowcast_reports), + "Nowcast reports cannot be provided without nowcast dates", + ) + + isempty(data.nowcast_dates) && return nothing + + _require( + issorted(data.nowcast_dates), + "Nowcast dates must be sorted chronologically", + ) + + for (i, report_vec) in enumerate(data.nowcast_reports) + _require( + length(report_vec) == length(data.nowcast_dates), + "nowcast_reports[$i] must have length $(length(data.nowcast_dates)), got $(length(report_vec))", + ) + _validate_nonnegative_finite_values(report_vec; label = "nowcast_reports[$i]") + end + + return nothing +end + +""" +Validates the input data for the EpiAutoGP pipeline. +""" +function validate_input(data::EpiAutoGPInput) + _validate_frequency(data) + _validate_observations(data) + _validate_nowcasts(data) + return data +end + +function read_and_validate_data(path_to_json::String) + return validate_input(read_data(path_to_json)) +end + +################## Preparation of data for modelling and forecasting ############### + +""" +Prepares the input data for modelling by: + - Excluding nowcast dates from the stable data used to fit the base model + - Creating nowcast data structures if nowcast reports are provided + - Generating forecast dates based on the forecast date and frequency + - Determine the data forwards and inverse transformations +""" +function prepare_for_modelling( + input::EpiAutoGPInput, + transformation_name::String, + n_ahead::Int, + n_forecasts::Int, + ) + stable_data_idxs = findall(date -> !(date in input.nowcast_dates), input.dates) + stable_data_dates = input.dates[stable_data_idxs] + stable_data_values = input.reports[stable_data_idxs] + + transformation, inv_transformation = + get_transformations(transformation_name, Float64.(input.reports)) + + nowcast_data = isempty(input.nowcast_dates) ? + nothing : + create_nowcast_data(input.nowcast_reports, input.nowcast_dates; transformation) + + time_step = input.frequency == "epiweekly" ? Week(1) : Day(1) + forecast_dates = [input.forecast_date + i * time_step for i in 0:n_ahead] + + n_forecasts_per_nowcast = isnothing(nowcast_data) ? + n_forecasts : + max(1, n_forecasts ÷ length(nowcast_data)) + + return (; + stable_data_dates, + stable_data_values, + nowcast_data, + forecast_dates, + n_forecasts_per_nowcast, + transformation, + inv_transformation, + ) +end + +""" +Fit the ensemble GP model using the stable data (not expected to be revised +by nowcasts) and return the fitted model object. +""" +function fit_base_model( + dates::Vector{Date}, + values::Vector{Float64}; + transformation, + n_particles::Int = 24, + smc_data_proportion::Float64 = 0.1, + n_mcmc::Int = 50, + n_hmc::Int = 50, + ) + transformed_data = create_transformed_data(dates, values; transformation) + return make_and_fit_model( + transformed_data; + n_particles = n_particles, + smc_data_proportion = smc_data_proportion, + n_mcmc = n_mcmc, + n_hmc = n_hmc, + ) +end + +""" +Generate forecasts from the fitted model, using nowcast data if provided. +If nowcast data is provided, generate forecasts for each nowcast scenario and pool the results. +""" +function _do_forecasts( + nowcast_data, + base_model, + forecast_dates, + n_forecasts_per_nowcast::Int; + inv_transformation, + ) + return forecast_with_nowcasts( + base_model, + nowcast_data, + forecast_dates, + n_forecasts_per_nowcast; + inv_transformation = inv_transformation, + ) +end + +function _do_forecasts( + nowcast_data::Nothing, + base_model, + forecast_dates, + n_forecasts_per_nowcast::Int; + inv_transformation, + ) + return forecast( + base_model, + forecast_dates, + n_forecasts_per_nowcast; + inv_transformation = inv_transformation, + ) +end + +function forecast_with_nowcastautogp(input::EpiAutoGPInput, args::Dict{String, Any}) + model_info = prepare_for_modelling( + input, + args["transformation"], + args["n-ahead"], + args["n-forecast-draws"], + ) + base_model = fit_base_model( + model_info.stable_data_dates, + model_info.stable_data_values; + transformation = model_info.transformation, + n_particles = args["n-particles"], + smc_data_proportion = args["smc-data-proportion"], + n_mcmc = args["n-mcmc"], + n_hmc = args["n-hmc"], + ) + forecasts = _do_forecasts( + model_info.nowcast_data, + base_model, + model_info.forecast_dates, + model_info.n_forecasts_per_nowcast; + inv_transformation = model_info.inv_transformation, + ) + + return (; forecast_dates = model_info.forecast_dates, forecasts = forecasts) +end + +################## Formatting and saving forecast output ############### +function create_forecast_df(results::NamedTuple) + return mapreduce(vcat, enumerate(eachcol(results.forecasts))) do (draw, sampled_values) + DataFrame( + :date => results.forecast_dates, + Symbol(".value") => sampled_values, + Symbol(".draw") => fill(Int32(draw), length(sampled_values)), + ) + end +end + +function _quote_duckdb_string(value::AbstractString) + return "'" * replace(value, "'" => "''") * "'" +end + +function _write_parquet_with_duckdb(path::AbstractString, table) + con = connect(DB, ":memory:") + return try + register_data_frame(con, table, "forecast_samples") + execute( + con, + "COPY forecast_samples TO $(_quote_duckdb_string(path)) (FORMAT parquet)", + ) + finally + close(con) + end +end + +function create_forecast_output( + input::EpiAutoGPInput, + results::NamedTuple, + output_dir::String; + save_output::Bool, + ) + forecast_df = create_forecast_df(results) + + variable_name = if input.target == "nhsn" + "observed_hospital_admissions" + else + Dict( + "observed" => "observed_ed_visits", + "other" => "other_ed_visits", + "pct" => "prop_disease_ed_visits", + )[input.ed_visit_type] + end + + if input.ed_visit_type == "pct" && input.target == "nssp" + forecast_df[!, Symbol(".value")] = forecast_df[!, Symbol(".value")] ./ 100.0 + end + + forecast_df[!, Symbol(".variable")] .= variable_name + forecast_df[!, :resolution] .= input.frequency + forecast_df[!, :geo_value] .= input.location + forecast_df[!, :disease] .= input.pathogen + + if save_output + parquet_path = joinpath(output_dir, "samples.parquet") + mkpath(dirname(parquet_path)) + _write_parquet_with_duckdb(parquet_path, forecast_df) + println("Saved pipeline forecast samples to $parquet_path") + end + + return forecast_df +end + +function main() + return try + args = parse_arguments() + input_data = read_and_validate_data(args["json-input"]) + results = forecast_with_nowcastautogp(input_data, args) + create_forecast_output( + input_data, + results, + args["output-dir"]; + save_output = true, + ) + catch e + println(stderr, "NowcastAutoGP pipeline run failed:") + showerror(stderr, e, catch_backtrace()) + println(stderr) + rethrow() + end +end + +if abspath(PROGRAM_FILE) == @__FILE__ + main() +end diff --git a/pipelines/epiautogp/forecast_epiautogp.py b/pipelines/epiautogp/forecast_epiautogp.py index 31f04adfb..8a9bc7d69 100644 --- a/pipelines/epiautogp/forecast_epiautogp.py +++ b/pipelines/epiautogp/forecast_epiautogp.py @@ -9,7 +9,6 @@ from pipelines.utils.cli_utils import add_common_forecast_arguments from pipelines.utils.common_utils import ( parse_exclude_date_ranges, - run_julia_code, run_julia_script, ) @@ -21,7 +20,7 @@ def run_epiautogp_forecast( execution_settings: dict, ) -> None: """ - Run EpiAutoGP forecasting model using Julia. + Run the EpiAutoGP forecasting model using the direct NowcastAutoGP Julia script. Parameters ---------- @@ -54,22 +53,12 @@ def run_epiautogp_forecast( Notes ----- - This function sets up the EpiAutoGP Julia environment and runs the - forecasting script. The output is saved to model_dir. + This function runs the direct NowcastAutoGP Julia script. The output is + saved to model_dir. """ # Ensure output directory exists model_dir.mkdir(parents=True, exist_ok=True) - # Instantiate julia environment for EpiAutoGP - run_julia_code( - """ - using Pkg - Pkg.activate("EpiAutoGP") - Pkg.instantiate() - """, - function_name="setup_epiautogp_environment", - ) - # Add path arguments to pass to EpiAutoGP params["json-input"] = str(json_input_path) params["output-dir"] = str(model_dir) @@ -82,7 +71,7 @@ def run_epiautogp_forecast( # Run Julia script run_julia_script( - "EpiAutoGP/run.jl", + "pipelines/epiautogp/fit_epiautogp.jl", args_to_epiautogp, executor_flags=executor_flags, function_name="run_epiautogp_forecast", @@ -233,7 +222,7 @@ def main( "smc_data_proportion": smc_data_proportion, } execution_settings = { - "project": "EpiAutoGP", + "project": "pipelines/epiautogp", "threads": n_threads, } diff --git a/pipelines/tests/test_epiautogp_parquet.py b/pipelines/tests/test_epiautogp_parquet.py new file mode 100644 index 000000000..fa1d76e55 --- /dev/null +++ b/pipelines/tests/test_epiautogp_parquet.py @@ -0,0 +1,84 @@ +"""Checks for the direct NowcastAutoGP Julia runner parquet output.""" + +import datetime as dt +import json +from pathlib import Path + +import polars as pl +import pytest + +from pipelines.utils.common_utils import run_julia_script + + +def _write_synthetic_input(path: Path) -> None: + start_date = dt.date(2024, 1, 1) + dates = [start_date + dt.timedelta(days=i) for i in range(36)] + reports = [12.0 + (i % 7) * 0.4 + i * 0.05 for i in range(len(dates))] + input_data = { + "dates": [date.isoformat() for date in dates], + "reports": reports, + "pathogen": "COVID-19", + "location": "US", + "target": "nssp", + "frequency": "daily", + "ed_visit_type": "pct", + "forecast_date": dt.date(2024, 2, 6).isoformat(), + "nowcast_dates": [], + "nowcast_reports": [], + } + path.write_text(json.dumps(input_data), encoding="utf-8") + + +def test_direct_nowcastautogp_runner_writes_pipeline_parquet(tmp_path) -> None: + input_path = tmp_path / "epiautogp-input.json" + output_dir = tmp_path / "model-fit" + _write_synthetic_input(input_path) + + try: + run_julia_script( + "pipelines/epiautogp/fit_epiautogp.jl", + [ + f"--json-input={input_path}", + f"--output-dir={output_dir}", + "--n-ahead=2", + "--n-particles=2", + "--n-mcmc=1", + "--n-hmc=1", + "--n-forecast-draws=4", + "--transformation=percentage", + "--smc-data-proportion=0.5", + ], + executor_flags=["--project=pipelines/epiautogp", "--startup-file=no"], + function_name="test_direct_nowcastautogp_runner_writes_pipeline_parquet", + text=True, + ) + except FileNotFoundError: + pytest.skip("julia is not available") + + samples_path = output_dir / "samples.parquet" + assert samples_path.is_file() + + samples = pl.read_parquet(samples_path) + assert samples.schema["date"] == pl.Date + assert samples.schema[".draw"] == pl.Int32 + assert samples["date"].to_list() == [ + dt.date(2024, 2, 6), + dt.date(2024, 2, 7), + dt.date(2024, 2, 8), + dt.date(2024, 2, 6), + dt.date(2024, 2, 7), + dt.date(2024, 2, 8), + dt.date(2024, 2, 6), + dt.date(2024, 2, 7), + dt.date(2024, 2, 8), + dt.date(2024, 2, 6), + dt.date(2024, 2, 7), + dt.date(2024, 2, 8), + ] + assert samples[".draw"].to_list() == [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4] + assert samples[".variable"].unique().to_list() == ["prop_disease_ed_visits"] + assert samples["resolution"].unique().to_list() == ["daily"] + assert samples["geo_value"].unique().to_list() == ["US"] + assert samples["disease"].unique().to_list() == ["COVID-19"] + assert samples[".value"].min() >= 0.0 + assert samples[".value"].max() <= 1.0 diff --git a/pipelines/tests/test_epiautogp_parquet_interop.py b/pipelines/tests/test_epiautogp_parquet_interop.py index 3427d2343..7493446fe 100644 --- a/pipelines/tests/test_epiautogp_parquet_interop.py +++ b/pipelines/tests/test_epiautogp_parquet_interop.py @@ -8,13 +8,13 @@ Those draw IDs are `.draw` in `samples.parquet`; the hubverse conversion renames them to `output_type_id`, which is where the CI failure surfaced. -The fixtures intentionally do not run full forecasting models. Instead, they -build tiny but realistic model-fit directories for each producer family: -EpiAutoGP samples from Julia, fable-style samples from R, and PyRenew-style -posterior predictive output from Python that is converted by the real PyRenew R -sample conversion script. The final test then runs the same hubverse conversion -and batch combine functions used by CI so producer schema drift is caught before -postprocessing reaches the expensive end-to-end workflow. +The fixtures build tiny but realistic model-fit directories for each producer +family: EpiAutoGP samples from the direct NowcastAutoGP Julia runner, +fable-style samples from R, and PyRenew-style posterior predictive output from +Python that is converted by the real PyRenew R sample conversion script. The +final test then runs the same hubverse conversion and batch combine functions +used by CI so producer schema drift is caught before postprocessing reaches the +expensive end-to-end workflow. """ import datetime as dt @@ -30,18 +30,17 @@ from pipelines.utils.common_utils import ( model_fit_dir_to_hub_tbl, - run_julia_code, + run_julia_script, run_r_code, run_r_script, ) from pipelines.utils.postprocess_forecast_batches import combine_hubverse_tables -EXPECTED_DATES = [ +FORECAST_DATES = [ dt.date(2024, 2, 4), - dt.date(2024, 2, 10), - dt.date(2024, 2, 4), - dt.date(2024, 2, 10), + dt.date(2024, 2, 5), ] +EXPECTED_DATES = FORECAST_DATES * 2 EXPECTED_DRAWS = [1, 1, 2, 2] @@ -74,62 +73,50 @@ def _skip_if_r_packages_missing(*packages: str) -> None: pytest.skip(f"R packages are not available: {exc}") +def _write_epiautogp_input(path: Path) -> None: + start_date = dt.date(2024, 1, 1) + dates = [start_date + dt.timedelta(days=i) for i in range(36)] + reports = [12.0 + (i % 7) * 0.4 + i * 0.05 for i in range(len(dates))] + input_data = { + "dates": [date.isoformat() for date in dates], + "reports": reports, + "pathogen": "COVID-19", + "location": "US", + "target": "nssp", + "frequency": "daily", + "ed_visit_type": "pct", + "forecast_date": FORECAST_DATES[0].isoformat(), + "nowcast_dates": [], + "nowcast_reports": [], + } + path.write_text(json.dumps(input_data), encoding="utf-8") + + @pytest.fixture(scope="module") def epiautogp_interop_paths(tmp_path_factory) -> Iterator[EpiAutoGPInteropPaths]: tmp_dir = tmp_path_factory.mktemp("epiautogp-parquet-interop") try: batch_dir = tmp_dir / "covid-19_r_2024-02-03_f_2024-01-01_t_2024-02-01" - model_fit_dir = batch_dir / "model_runs" / "US" / "epiautogp_nhsn_epiweekly" - forecast_dates = list(dict.fromkeys(EXPECTED_DATES)) - assert len(EXPECTED_DATES) % len(forecast_dates) == 0 - draw_count = len(EXPECTED_DATES) // len(forecast_dates) - assert EXPECTED_DATES == forecast_dates * draw_count - - forecast_dates_julia = ", ".join( - f'Date("{forecast_date.isoformat()}")' for forecast_date in forecast_dates - ) - forecasts_julia = "; ".join( - " ".join( - str(float(100 * (date_index + 1) + draw_index + 1)) - for draw_index in range(draw_count) - ) - for date_index in range(len(forecast_dates)) - ) - julia_code = textwrap.dedent( - f""" - using Dates - using EpiAutoGP - - input = EpiAutoGPInput( - [Date("2024-01-01")], - [100.0], - "COVID-19", - "US", - "nhsn", - "epiweekly", - "observed", - Date("2024-02-03"), - Date[], - Vector{{Real}}[] - ) - results = ( - forecast_dates = [{forecast_dates_julia}], - forecasts = [{forecasts_julia}], - ) + model_fit_dir = batch_dir / "model_runs" / "US" / "epiautogp_nssp_daily_pct" + input_path = tmp_dir / "epiautogp-input.json" + _write_epiautogp_input(input_path) - create_forecast_output( - input, - results, - {_quote_for_embedded_code(model_fit_dir)}, - PipelineOutput(); - save_output = true - ) - """ - ) try: - run_julia_code( - julia_code, - executor_flags=["--project=EpiAutoGP", "--startup-file=no"], + run_julia_script( + "pipelines/epiautogp/fit_epiautogp.jl", + [ + f"--json-input={input_path}", + f"--output-dir={model_fit_dir}", + "--n-ahead=1", + "--n-particles=2", + "--n-mcmc=1", + "--n-hmc=1", + "--n-forecast-draws=2", + "--transformation=percentage", + "--smc-data-proportion=0.5", + ], + executor_flags=["--project=pipelines/epiautogp", "--startup-file=no"], + function_name="run_epiautogp_interop_fixture", text=True, ) except FileNotFoundError: