-
Notifications
You must be signed in to change notification settings - Fork 0
/
EV_RP.r
258 lines (193 loc) · 10.3 KB
/
EV_RP.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#----------------------------------------------------------------------------------------------------------
# Name: EV_RP.r
# Purpose: This main function estimates return periods using the block maxima/minima approach
# on gridded data. All external modules are called within this script.
# Author: Francesco Tonini
# Email: [email protected]
# Created: 11/1/2009
# Copyright: (c) 2009 by Francesco Tonini
# License: GNU General Public License (GPL)
# Software: Tested successfully using R-2.14.0 64-bit version(http://www.r-project.org/)
#-------------------------------------------------------------------------------------------
##Define the main working directory
##Always use either / or \\ to specify the path
mainDir <- 'C:/Temp'
#mainDir <- '~/Desktop/Temp' #if you are on a MacOSX environment
##Let's set the working directory
setwd(file.path(mainDir))
##Define the subdirectory(ies) where your output files will be saved
subDir <- 'Output'
##Create a physical copy of the subdirectory folder(s) where you save your output
##If the directory already exists it gives a warning, BUT we can suppress it using showWarnings = FALSE
dir.create(file.path(mainDir, subDir), showWarnings = FALSE)
##Use an external source file w/ all modules (functions) used within this script.
##Use FULL PATH if source file is not in the same folder w/ this script
source('myEVfunctions.r')
##Path to folders in which you want to save all your output files
workdir_Output <- file.path(mainDir, subDir)
##Load all required libraries
print('Loading required libraries...')
load.packages()
##Let's set the simulation parameters:
##Alamata.tot is the dataset we built for the Alamata woreda, Tigray (Ethiopia).
##This can be replaced by an entire region or country (the bigger the extent, the more the code will take to finish)
dataset <- Alamata.tot
##Number of pixels within the chosen dataset
npixels <- length(unique(dataset$id))
##Let's specify the return level(s) for which we want to calculate the associated probability of exceedance
##(i.e. return period)
return_levels <- c(-0.15,-0.05)
##Let's create a dataframe to store the parameters of the GEV distributions and their confidence intervals
tab.parameters <- data.frame(matrix(NA,npixels,11))
colnames(tab.parameters) <- c('id.pix','gum.flag','mu','sig','shp','mu.inf','mu.sup','sig.inf','sig.sup','shp.inf','shp.sup')
##Let's create some labels for the return levels estimates and their upper and lower confidence intervals
RP_labels <- paste('prob_',return_levels,sep='')
##Let's create a dataframe to store return levels estimates and their upper and lower confidence intervals
tab.prob <- data.frame(matrix(NA, npixels, length(return_levels) + 1))
colnames(tab.prob) <- c("id.pix",RP_labels)
##Create an empty vector in which we would save the pixel IDs
##whose GEV/Gumbel parameter estimates did not converge
px.idWarning <- c()
######################################################################
##Flagged values (in DN) by VITO for SPOT VGT S10
#251=missing, 252=cloud, 253=snow, 254=sea, 255=background
#Converted to ADVI values using: ADVI = -1.25 + 0.01 * DN
#1.26=missing, 1.27=cloud, 1.28=snow, 1.29=sea, 1.30=background
######################################################################
##Create a Tk window element as a container
root <- tktoplevel()
tktitle(root) <- 'Extreme Value Analysis'
##Define the label of a progress bar
lab <- tk2label(root)
##Define the length and create a progress bar
pb <- tk2progress(root, length = 300)
#Configure
tkconfigure(pb, value=0, maximum = npixels-1)
##Pack all labels and progress bars inside the Tk container
tkpack(lab)
tkpack(pb)
##Refresh the Tk window
tcl('update')
##LOOP through each pixel, create its time series of maxima/minima,
##estimate GEV parameters and return levels
for (pix in 1:npixels){
#update progress bar according to the pixel currently being processed
tkconfigure(lab, text = paste('Pixel',pix,'Out of',npixels, ' (',round(pix/npixels*100),'% )'))
tkconfigure(pb, value = pix - 1)
tcl('update')
##Slice the main dataset by extracting only historical records for the current pixel
pix.dataset <- dataset[dataset$id == pix,]
##This section was developed specifically for the case of having dekadal (1 dekad = 10days)
##time series and extracting monthly (1 month = 3 dekads) MINIMA.
##NOTE: If you are interested in taking monthly MAXIMA just use FUN = max
##NOTE2: If you wanna take YEARLY minima/maxima then you should use:
##etiq <- factor(pix.dataset$year) ...BUT make sure the time series is longer than at least 30 years
##otherwise the GEV estimates will not be reliable, as well as the return level estimates.
##Identify the 'dekadal' variable in the dataset
Dekads <- pix.dataset$decads
##Identify the 'Year' variable in the dataset
Years <- pix.dataset$year
##Identify the 'Value' variable in the dataset
Values <- pix.dataset$val
##Let's store the labels (as factors) of all existing dekad-year pairs (regardless of the dekad number)
etiq <- factor( paste(substring(Dekads,1,3), Years, sep='') )
##Let's extract the minima (use FUN = max if you want maxima) for each unique level of the factor
monthExtract <- tapply(Values, etiq, FUN = min)
##Since values in 'monthExtract' have an alphabetical order relative to the month (Apr1998,Apr1999,Apr2000,etc.)
##we need to sort them so that the series of values follows the real time line
##If your are taking YEARLY minima/maxima skip (1-2-3) and use only (4). This, because
##years already have a natural ascending order:
#(1) by taking unique factor levels, we have a natural time line order
UniqueEtiq <- unique(etiq)
#(2) let's now match the correct ordered position of unique factors with our monthly values
PosIdx <- match(as.character(UniqueEtiq), names(monthExtract))
#(3) now that we got all the correct positions, let's put all our values in a numerical vector. This
#is the time series to be used with extreme value models
px.min <- as.vector(monthExtract[PosIdx])
#(4) Uncomment the next line and use it to build your YEARLY maxima/minima time series
#px.min <- as.vector(monthExtract)
#If the pixel is coded as 1.29=sea or 1.30=background we simply put 999
if ( any(px.min == 1.29) | any(px.min == 1.30) ) {
tab.parameters[pix,] <- 999
tab.prob[pix,] <- 999
next
}
#Let's now look at values 1.26=missing, 1.27=cloud, 1.28=snow. They will need to be removed from the
#numerical vector, otherwise the extreme models will be biased. Since in our study area there are very few
#or no cloud values, snow, or missing, we can either remove them from the numerical vector, or
#replace them with a simple average between N previous/following values.
Flags <- c(1.26, 1.27, 1.28)
#replace flagged values with NAs (if there are any)
px.min[ px.min %in% Flags] <- NA
#if there are flagged values, count them and see if their number exceeds 15% of the total time series
if (any(is.na(px.min))) {
NA.sum <- sum(is.na(px.min))
#print a warning on screen to let the user know that model estimates may be biased
if (NA.sum / length(px.min) * 100 >= 15) cat('\nATTENTION: More than 15% of values are flagged.\nModel estimates may be biased')
}
#override the original time series by removing any NA (flagged) values
px.min <- px.min[!is.na(px.min)]
#estimate GEV model (if you are working with maxima, DO NOT use the '-' sign
minimi.gev <- gev.fit(-px.min,show=F)
class(minimi.gev) <- "gev.fit"
#estimate Gumbel model (if you are working with maxima, DO NOT use the '-' sign
minimi.gum <- gum.fit(-px.min,show=F)
class(minimi.gum) <- "gum.fit"
#If the MLE estimation procedure does not converge, save the pixel ID into
#the warning vector
if (minimi.gev$conv > 0 | minimi.gum$conv > 0){
px.idWarning <- c(px.idWarning, pix)
}else{
# Likelihood Ratio method to compare two models
lik.ratio <- 2 * (minimi.gum$nllh - minimi.gev$nllh)
pvalue <- (1 - pchisq(lik.ratio,1))
#if the p-value is not significant (>.05) we use the gumbel distr. (shape parameter --> 0)
if (pvalue > 0.05){
#use a variable to flag the use of a gumbel distr.
gum.flag <- 1
mu <- minimi.gum$mle[1] #location
sig <- minimi.gum$mle[2] #scale
shp <- 0 #shape
#conf. intervals (based ~ N(0,1))
mu.inf <- minimi.gum$mle[1] - 1.96 * minimi.gum$se[1]
mu.sup <- minimi.gum$mle[1] + 1.96 * minimi.gum$se[1]
sig.inf <- minimi.gum$mle[2] - 1.96 * minimi.gum$se[2]
sig.sup <- minimi.gum$mle[2] + 1.96 * minimi.gum$se[2]
shp.inf <- 0
shp.sup <- 0
#calculate probability of observing the desired return level(s)
#if you are using MAXIMA, go to the myEVfunctions.r file and adjust
#the code within the prob_fun(). Follow the comments.
prob <- prob_fun()
}else{ #otherwise use the GEV distr.
#use a variable to flag the NON use of a gumbel distr.
gum.flag <- 0
mu <- minimi.gev$mle[1] #location
sig <- minimi.gev$mle[2] #scale
shp <- minimi.gev$mle[3] #shape
#conf. intervals (based ~ N(0,1))
mu.inf <- minimi.gev$mle[1] - 1.96 * minimi.gev$se[1]
mu.sup <- minimi.gev$mle[1] + 1.96 * minimi.gev$se[1]
sig.inf <- minimi.gev$mle[2] - 1.96 * minimi.gev$se[2]
sig.sup <- minimi.gev$mle[2] + 1.96 * minimi.gev$se[2]
shp.inf <- minimi.gev$mle[3] - 1.96 * minimi.gev$se[3]
shp.sup <- minimi.gev$mle[3] + 1.96 * minimi.gev$se[3]
#calculate probability of observing the desired return level(s)
#if you are using MAXIMA, go to the myEVfunctions.r file and adjust
#the code within the prob_fun(). Follow the comments.
prob <- prob_fun()
}
#let's store the parameter and probability of exceedance estimates in their respective dataset, for the row
#corresponding to the current pixel in the LOOP
tab.parameters[pix,] <- c(pix,gum.flag,round(mu,2),round(sig,2),round(shp,2),round(mu.inf,2),round(mu.sup,2),
round(sig.inf,2),round(sig.sup,2),round(shp.inf,2),round(shp.sup,2))
tab.prob[pix,] <- c(pix, prob)
}
}
#For those pixels whose return levels could not be computed, use the median of the 4 nearest neighbors
if(length(px.idWarning) > 0) median.interp_RP()
#Save files to output folder
save_files_RP()
tkmessageBox(title = "Message", message = 'Done! Output Files Saved To Folder', icon = "info", type = "ok")
##Suppress the progress bar
tkdestroy(root)