-
Notifications
You must be signed in to change notification settings - Fork 1
/
gefs4cast-snapshot.R
96 lines (76 loc) · 2.87 KB
/
gefs4cast-snapshot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#renv::restore()
## CRON-job to update the recent GEFS parquet files
## Will pick up from the day after the last date on record
# WARNING: needs >= GDAL 3.4.x
#remotes::install_github("eco4cast/gefs4cast")
library(gefs4cast)
library(purrr)
library(dplyr)
# be littler-compatible
readRenviron("~/.Renviron")
print(paste0("Start: ",Sys.time()))
# Set destination bucket
Sys.unsetenv("AWS_DEFAULT_REGION")
Sys.unsetenv("AWS_S3_ENDPOINT")
Sys.setenv(AWS_EC2_METADATA_DISABLED="TRUE")
s3 <- arrow::s3_bucket("neon4cast-drivers", endpoint_override = "data.ecoforecast.org")
# Set desired dates and threads
# Adjust threads between 70 - 1120 depending on available RAM, CPU, + bandwidth
threads <- 100
gefs <- s3$path("noaa/gefs-v12/stage1/0")
have <- gefs$ls()
have_days <- as.Date(basename(have))
start <- max(have_days, na.rm=TRUE)
#start <- as.Date("2022-10-08")
#have_cycles <- basename(gefs$ls(start))
aws <- arrow::s3_bucket("noaa-gefs-pds", anonymous = TRUE)
avail <- aws$ls()
days <- as.Date(gsub("^gefs\\.(\\d{8})", "\\1", avail), "%Y%m%d")
avail_day <- max(days,na.rm=TRUE)
avail_cycles <- basename( aws$ls(avail[which.max(days)]) )
# ick can detect folder before it has data!
# hackish sanity check
A <- aws$ls( paste(avail[which.max(days)], max(avail_cycles), "atmos", "pgrb2ap5", sep="/" ))
B <- aws$ls( paste(avail[which.max(days)-1], max(avail_cycles), "atmos", "pgrb2ap5", sep="/" ))
complete <- length(A) == length(B)
if(!complete) avail_cycles <- avail_cycles[-length(avail_cycles)]
cycles <- c("06", "12", "18")
full_dates <- list()
cycle_dates <- list()
if(start <= avail_day -1 ) {
# If strictly more than a full day behind, get all records up to day before.
full_dates <- seq(start, avail_day-1, by=1)
map(full_dates, noaa_gefs, cycle="00", threads=threads, s3=s3)
map(cycles,
function(cy) {
map(full_dates, noaa_gefs, cycle=cy, max_horizon = 6,
threads=threads, s3=s3, gdal_ops="")
})
## And also get available records for the current day:
noaa_gefs(avail_day, cycle="00", threads=threads, s3=s3)
map(avail_cycles, function(cy)
noaa_gefs(avail_day, cycle=cy, threads=threads, s3=s3)
)
# If we have some of the most recent available day, we need only missing cycles
} else if (start == avail_day) {
#need_cycles <- avail_cycles[!(avail_cycles %in% have_cycles)]
need_cycles <- avail_cycles
if(length(need_cycles)==0){
message("Up to date.")
}else {
if("00" %in% need_cycles) {
full_dates <- start
}
cycles <- need_cycles[need_cycles != "00"]
cycle_dates <- start
}
## get 00 if it is missing:
map(full_dates, noaa_gefs, cycle="00", threads=threads, s3=s3)
## get the non-00 cycles that are missing
map(cycles,
function(cy) {
map(cycle_dates, noaa_gefs, cycle=cy, max_horizon = 6,
threads=threads, s3=s3, gdal_ops="")
})
}
print(paste0("End: ",Sys.time()))