-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_generation.R
106 lines (90 loc) · 2.5 KB
/
data_generation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Generalized Poisson AR(1) - GPAR(1) Simulation
#
# X_t = S_t(X_{t-1}) + e_t, t=1, 2 , ...
#
# e_t ~ GP(q*mu, xi)
# S_t(.) ~ QB(alpha, xi/mu, .)
# X_t ~ GP(mu, xi)
#
# Constraints:
# 0 < alpha < 1
# mu > 0
# xi >= 0
library(gp)
graphics.off() # close all plots
rm(list=ls()) # clear the (global) environment variables
cat("\014") # clear the console (Ctrl+L)
set.seed(40)
# Print auxiliary function
printf <- function(...) {
x = paste(sprintf(...),"\n")
return(cat(x))
}
# Process parameters and outliers
alpha <- 0.5 # autocorrelation parameter
mu <- 2
xi <- 0.5 # dispersion parater
q <- 1-alpha # auxiliary variable
tau <- c(7, 26, 60, 90, 91) # outliers location
eta <- 6 # outliers magnitude
filename <- "s9" # csv file to save the data
# Theoretical mean, variance and sd
mean_x <- mu/(1-xi)
var_x <- mu/(1-xi)^3
sd_x <- sqrt(var_x)
# Simulation period
T <- 120
# Quasi-binomial thinning pmf
dqb <- function(x, alpha, xi, n){
q <- 1-alpha
num <- alpha*q*choose(n,x) * (alpha + x*xi)^(x-1) * (q + (n-x)*xi)^(n-x-1)
den <- (1 + n*xi)^(n-1)
y <- num/den
return(y)
}
# Quasi-binomial thinning cdf
pqb <- function(x, alpha, xi, n){
x_ <- 0:x
y <- sum(sapply(x_, dqb, alpha=alpha, xi=xi, n=n))
return(y)
}
# Draw from quasi-binomial
rqb <- function(size, alpha, xi, n){
x <- 0:n
w <- sapply(x, dqb, alpha=alpha, xi=xi, n=n)
y <- sample(x, size=size, replace=TRUE, prob=w)
return(y)
}
# GPAR(1) simulation
x <- NULL
x[1] <- mean_x
for (t in 2:T){
# Innovations
e <- rgp(n=2, q*mu, xi, method="Branching")[1]
# Quasi-binomial thinning operator
s <- rqb(1, alpha, xi/mu, x[t-1])
x[t] <- s + e
}
# Insert the outliers
y <- x
y[tau] <- y[tau] + eta
#####
# Theoretical vs simulated mean and variance
printf("Theoretical mean: %.2f", mean_x)
printf("Sample mean: %.2f", mean(x))
printf("Theoretical var: %.2f", var_x)
printf("Theoretical sd: %.2f", sd_x)
printf("Sample var: %.2f", var(x))
printf("mu + 2*sd: %.2f", mean_x + 2*sd_x)
printf("mu + 5*sd: %.2f", mean_x + 5*sd_x)
#####
# Plot
par(mar = c(2, 2, 0.2, 0.2))
plot(y, type='s')
points(tau, y[tau], col="red")
abline(h=mean_x, col="gray", lty=2)
abline(h=mean_x + 3*sd_x, col="gray", lty=3)
legend(x="topright", lty=c(2,3), col="gray", legend=c("E[X]", "E[X] + 3sigma[X]"))
#####
# Save the data
write.table(y, file = paste("data/", filename, ".csv", sep=""), row.names=FALSE, col.names=FALSE)