From e3c41f31a7f8291cc912f7c4624b3a6d3e06ed50 Mon Sep 17 00:00:00 2001 From: Cole Brokamp Date: Tue, 6 Feb 2024 08:54:57 -0500 Subject: [PATCH] use httr instead of httr2 for ability to set proxy config globally (#32) * use httr instead of httr2 for ability to set proxy config globally * justfile - release specific types of data; cchmc hpc example --- DESCRIPTION | 1 - R/merra.R | 26 ++++----- .../install_merra_from_source_on_cchmc_hpc.sh | 21 +++++++ justfile | 57 +++++-------------- man/get_merra_data.Rd | 2 + tests/testthat/test-merra-daily.R | 1 - tests/testthat/test-merra.R | 2 - 7 files changed, 48 insertions(+), 62 deletions(-) create mode 100644 inst/install_merra_from_source_on_cchmc_hpc.sh diff --git a/DESCRIPTION b/DESCRIPTION index 79ac2c1..eb4fb96 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,6 @@ Imports: terra, fs, httr, - httr2, tidync, rlang, tibble, diff --git a/R/merra.R b/R/merra.R index b2073af..45e9245 100644 --- a/R/merra.R +++ b/R/merra.R @@ -14,6 +14,8 @@ #' converted to micrograms per cubic meter ($ug/m^3$). #' - Total surface PM2.5 mass is calculated according to #' the formula in +#' Set a proxy to be used by all {httr} calls in the merra functions with `httr::set_config(httr::use_proxy( ... ))`; e.g. +#' `httr::set_config(httr::use_proxy("http://bmiproxyp.chmcres.cchmc.org", 80, Sys.getenv("CCHMC_USERNAME"), Sys.getenv("CCHMC_PASSWORD")))` #' @param x a vector of s2 cell identifers (`s2_cell` object) #' @param dates a list of date vectors for the MERRA data, must be the same length as `x` #' @param merra_year a character string that is the year for the merra data @@ -116,24 +118,20 @@ create_daily_merra_data <- function(merra_date) { tf <- tempfile(fileext = ".nc4") req_url <- fs::path("https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2", - "M2T1NXAER.5.12.4", - format(the_date, "%Y"), - format(the_date, "%m"), - paste0("MERRA2_400.tavg1_2d_aer_Nx.", format(the_date, "%Y%m%d")), - ext = "nc4" - ) + "M2T1NXAER.5.12.4", + format(the_date, "%Y"), + format(the_date, "%m"), + paste0("MERRA2_400.tavg1_2d_aer_Nx.", format(the_date, "%Y%m%d")), + ext = "nc4") if ((format(the_date, "%Y") == "2020" & format(the_date, "%m") == "09") || (format(the_date, "%Y") == "2021" & format(the_date, "%m") %in% c("06", "07", "08", "09"))) { req_url <- gsub("MERRA2_400.", "MERRA2_401.", req_url, fixed = TRUE) } - req_url |> - httr2::request() |> - httr2::req_auth_basic( - username = earthdata_secrets["EARTHDATA_USERNAME"], - password = earthdata_secrets["EARTHDATA_PASSWORD"] - ) |> - httr2::req_cache(tempdir()) |> - httr2::req_perform(path = tf) + httr::GET( + req_url, + httr::authenticate(user = earthdata_secrets["EARTHDATA_USERNAME"], password = earthdata_secrets["EARTHDATA_PASSWORD"]), + httr::write_disk(tf) + ) out <- tidync::tidync(tf) |> tidync::hyper_filter( diff --git a/inst/install_merra_from_source_on_cchmc_hpc.sh b/inst/install_merra_from_source_on_cchmc_hpc.sh new file mode 100644 index 0000000..598549a --- /dev/null +++ b/inst/install_merra_from_source_on_cchmc_hpc.sh @@ -0,0 +1,21 @@ +#BSUB -W 30:00 +#BSUB -n 2 +#BSUB -M 24000 +#BSUB -R "span[ptile=2]" +#BSUB -e install_merra.out +#BSUB -o install_merra.out + +module load singularity +export APPC_INSTALL_DATA_FROM_SOURCE=1 + +echo "A reminder to ensure CCHMC_USERNAME and CCHMC_PASSWORD environment variables are set (or are defined in a .env file) for curl proxy settings" + +for year in "2023" "2022" "2021" "2020" "2019" "2018" "2017"; +do + rm -f ~/.local/share/R/appc/merra_$year.rds + singularity exec ~/singr_latest.sif \ + Rscript \ + -e "if (file.exists('.env')) dotenv::load_dot_env()" \ + -e "httr::set_config(httr::use_proxy('http://bmiproxyp.chmcres.cchmc.org', 80, Sys.getenv('CCHMC_USERNAME'), Sys.getenv('CCHMC_PASSWORD')))" \ + -e "appc::install_merra_data('$year')" +done diff --git a/justfile b/justfile index 54975cd..bfec8cd 100644 --- a/justfile +++ b/justfile @@ -3,22 +3,10 @@ set dotenv-load pkg_version := `Rscript -e "cat(desc::desc_get('Version'))"` geomarker_folder := `Rscript -e "cat(tools::R_user_dir('appc', 'data'))"` -# document R package -document: - R -e "devtools::document()" - -# check R package -check: - R -e "devtools::check()" - # run tests without cached geomarker files docker_test: docker build -t appc . -# build documentation website -build_site: document - R -e "pkgdown::build_site(preview = TRUE, devel = TRUE)" - # make training data make_training_data: Rscript inst/make_training_data.R @@ -38,48 +26,29 @@ report: # install nei data from source and upload to github release release_nei_data: - rm -f "{{geomarker_folder}}/nei_2017.rds" - R --quiet -e \ - "devtools::load_all(); \ - options('appc_install_data_from_source' = TRUE); \ - install_nei_point_data('2017')" - gh release upload v{{pkg_version}} "{{geomarker_folder}}/nei_2017.rds" - rm -f "{{geomarker_folder}}/nei_2020.rds" - R --quiet -e \ - "devtools::load_all(); \ - options('appc_install_data_from_source' = TRUE); \ - install_nei_point_data('2020')" - gh release upload v{{pkg_version}} "{{geomarker_folder}}/nei_2020.rds" + for year in 2017 2020; do \ + rm -f "{{geomarker_folder}}"/nei_$year.rds; \ + APPC_INSTALL_DATA_FROM_SOURCE=1 Rscript -e "appc::install_nei_point_data('$year')"; \ + gh release upload v{{pkg_version}} "{{geomarker_folder}}"/nei_$year.rds; \ + done # install smoke data from source and upload to github release release_smoke_data: rm -f "{{geomarker_folder}}/smoke.rds" - R --quiet -e \ - "devtools::load_all(); \ - options('appc_install_data_from_source' = TRUE); \ - install_smoke_pm_data()" + APPC_INSTALL_DATA_FROM_SOURCE=1 Rscript -e "install_smoke_pm_data()" gh release upload v{{pkg_version}} "{{geomarker_folder}}/smoke.rds" # install traffic data from source and upload to github release release_traffic_data: rm -f "{{geomarker_folder}}/hpms_f123_aadt.rds" - R --quiet -e \ - "devtools::load_all(); \ - options('appc_install_data_from_source' = TRUE); \ - options('timeout' = 3000); \ - install_traffic()" + APPC_INSTALL_DATA_FROM_SOURCE=1 Rscript -e "merra::install_traffic()" gh release upload v{{pkg_version}} "{{geomarker_folder}}/hpms_f123_aadt.rds" -# install merra data from source and upload to github release +# upload merra data to github release release_merra_data: - export APPC_INSTALL_DATA_FROM_SOURCE=TRUE - rm "{{geomarker_folder}}/merra_2017.rds" - R -f -e "install_merra_data('2017')" - gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2017.rds" - rm "{{geomarker_folder}}/merra_2018.rds" - R -f -e "install_merra_data('2018')" - gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2018.rds" - rm "{{geomarker_folder}}/merra_2017.rds" - R -f -e "install_merra_data('2017')" - gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2017.rds" + for year in {2017..2023}; do \ + rm -f "{{geomarker_folder}}"/merra_$year.rds; \ + APPC_INSTALL_DATA_FROM_SOURCE=1 Rscript -e "appc::install_merra_data('$year')"; \ + gh release upload v{{pkg_version}} "{{geomarker_folder}}"/merra_$year.rds; \ + done diff --git a/man/get_merra_data.Rd b/man/get_merra_data.Rd index 0fb1faa..98c3666 100644 --- a/man/get_merra_data.Rd +++ b/man/get_merra_data.Rd @@ -47,6 +47,8 @@ around the contiguous US, averaged to daily values, and converted to micrograms per cubic meter ($ug/m^3$). \item Total surface PM2.5 mass is calculated according to the formula in \url{https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/FAQ/#Q4} +Set a proxy to be used by all {httr} calls in the merra functions with \code{httr::set_config(httr::use_proxy( ... ))}; e.g. +\code{httr::set_config(httr::use_proxy("http://bmiproxyp.chmcres.cchmc.org", 80, Sys.getenv("CCHMC_USERNAME"), Sys.getenv("CCHMC_PASSWORD")))} } } \examples{ diff --git a/tests/testthat/test-merra-daily.R b/tests/testthat/test-merra-daily.R index 9036b18..fe2eb38 100644 --- a/tests/testthat/test-merra-daily.R +++ b/tests/testthat/test-merra-daily.R @@ -1,7 +1,6 @@ if (file.exists(".env")) dotenv::load_dot_env() earthdata_secrets <- Sys.getenv(c("EARTHDATA_USERNAME", "EARTHDATA_PASSWORD"), unset = NA) skip_if(any(is.na(earthdata_secrets)), message = "no earthdata credentials found") -skip() test_that("getting daily merra from GES DISC works", { # "normal" pattern diff --git a/tests/testthat/test-merra.R b/tests/testthat/test-merra.R index fbcc109..9cba5d7 100644 --- a/tests/testthat/test-merra.R +++ b/tests/testthat/test-merra.R @@ -1,5 +1,3 @@ -skip() - test_that("get_merra_data works", { d <- list( "8841b39a7c46e25f" = as.Date(c("2023-05-18", "2023-11-06")),