-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOdysseyMomentum.R
186 lines (150 loc) · 7.45 KB
/
OdysseyMomentum.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
##### ODYSSEY MOMENTUM #####
##### THE TELEMACHOS #####
# We will model the deterioration of a patient by formulating it as a Markov chain.
# We have the following possible actions:
# - No additional attention needed (DN)
# - For now, the upcoming actions will be indicated by performing the action "inspection" (IN),
# during the pilot we will differentiate between these actions.
# - Patients can fill in a questionnaire (A1) with corresponding cost c1
# - Patients can be seen by a "wijkverpleegkundige" (A2) with corresponding cost c2
# - Patients needs to be seen by the GP (A3) with corresponding cost c3
# - Patients needs to be seen by a specialist (A4) with corresponding cost c4
# - Patients need to go the emergency departement (ED) with corresponding cost c5
# We do not know the actual health status of the patient with certainty
# until the patient fills in the questionnaire or is seen by a health professional
# Due to this, the best approach is to model the problem as a partially observable Markov decision process (POMDP).
# We let theta denote the knowledge state. So theta_i,j denote the knowledge state that represents that the
# last observed health status is i, and that it is j time periods ago that this health status has been observed.
# Input
S <- c("1", "2", "3", "4", "5") # Five possible health states
m <- 4 # The number of health stats before the ED
A <- c("DN", "IN", "ED") # The three possible actions
# Cost parameters / Negative rewards
c_inspect <- 20
c_GP <- 50
c_ED <- 10000
N <- 50 # Maximum number of time periods between actions. (This is needed as the state space has to be bounded)
# For now we will assume patients with "Disease A" deteriorate according to a gamma process with parameters a and b.
# This will be adjusted when we get insight into the data.
a <- 0.02
b <- 0.05
L <- 1 # We normalize the deterioration level for which you enter state 5 to one.
dt <- 1 # Each time period in our model consists of one day.
# To find the thresholds corresponding to the other health statuses we discretize the gamma deterioration process.
# The discrete-time Markov chain is now described by the transition probability matrix P.
TPM_gammaprocess <- function(a, b, L, m, dt){
P <- matrix(0, nrow=m+1, ncol = m+1)
dx <- L/m
prob <- function(i, dx, dt, a, b){
(1/dx)*integrate(pgamma, lower = (i-1)*dx, upper =i*dx, shape = a*dt, scale = b)$value
}
prob = Vectorize(prob, "i")
q = prob(1:m, dx, dt, a, b)
p = diff(c(0,q))
for(i in 1:m){
P[i,i:m] = p[1:(m+1-i)]
P[i, m+1] = 1-sum(P[i,1:m])
}
P[m+1, m+1] = 1
P[P<0] = 0
return(P)
}
P <- TPM_gammaprocess(a, b, L, m, dt)
# Note that P is upper-diagonal and thus we assume patients cannot improve their health status without interference of a health professional.
# If data shows this assumption is wrong, this can easily be changed.
# We have to take into account that new patients join our system (and automatically enter class 1) and people leave the system (due to mortality).
# We make the assumption that the amount of patients joining and leaving the system is equal, such that our system size stays the same.
# Furthermore, for now, we assume that the patients that leave the system are uniformly distributed among the states and the total percentage of patients leaving is 1e-7.
P.test[2,2:5] <- P[2,2:5] - 1e-7/(3*4)
P.test[2,1] <- 1e-7/3
P.test[3,3:5] <- P[3,3:5] - 1e-7/(3*3)
P.test[3,1] <- 1e-7/3
P.test[4,4:5] <- P[4,4:5] - 1e-7/(3*2)
P.test[4,1] <- 1e-7/3
P <- P.test
# Creating the knowledge states theta
theta <- array(NA, dim = c(m, N+1, length(S)))
q <- array(NA, dim = c(m, N+1)) # Probability of going to the ED when choosing DN in state theta_{i,j}
for(i in 1:m){
theta[i,1,] <- c(rep(0,i-1), 1, rep(0,m-i+1)) # Calculating the knowledge state of the patients that had their last inspection today.
# We know their health status with certainty.
for(j in 1:N){
theta[i,j+1,] <- c(theta[i,j,]%*%P[,1:m]/sum(theta[i,j,]%*%P[,1:m]),0) # Calculating the knowledge states for all patients that did not have
# an inspection today. Hence, answer the question:
# what is likely to be their current health status?
q[i,j] <- 1 - sum(theta[i,j,]%*%P[,1:m]) # Probability of going to the ED when choosing DN in state theta_{i,j}
}
q[i, N+1] <- 1 - sum(theta[i,N+1,]%*%P[,1:m])
}
# Now we move to definining the different transaction probabilities per action.
P.new <- array(0, dim = c(length(A), m*(N+1), m*(N+1)))
# Transition matrix when choosing DN
# Note that we have got m*(N+1) possible states
# We use the following order of the states: c(theta^{1,0}, theta^{1,1}, ..., theta^{2,0}, theta^{2,1}, ...)
# DN is possible in all states except at theta^{i,N}
# You go back to theta^{1,0} if failure occurs, otherwise you continue to the next knowledge state
for(i in 1:(m*(N+1))){
P.new[1,i,3] <- as.vector(t(q))[i] # For now, we assume a patient coming out of the ED will be
# a class 3 patient the rest of the time period until the next inspection.
for(j in 2:(m*(N+1))){
if(j==i+1)
P.new[1,i,j] <- 1 - as.vector(t(q))[i] # If you do not inspect the patient and he did not come to the ED themselves,
# the next knowledge state is theta^{i, j + 1}
}
if((i%%(N+1) == 0) == 1){
P.new[1,i,] <- 0
}
}
# Transition matrix when choosing IN
# A1 is possible in all states except theta^{i,0}
# You move to one of the theta^{i,0} as after IN you know the precise health status of the patient
# To simplify the calculations we first put our knowledge states in a matrix
theta.matrix <- c()
for(i in 1:m){
for(j in 1:(N+1)){
theta.matrix <- rbind(theta.matrix,theta[i,j,])
}
}
for(j in seq(1,m*(N+1), by = N+1)){
P.new[2,,j] <- theta.matrix[,which(seq(1,m*(N+1), by = N+1) == j)]
P.new[2,j,] <- 0
}
# Furthermore, there is a probability that this action improves the current health state of the patient.
# This will be incoorporated during the pilot.
# Transition matrix when choosing ED
# The new health status is random:
P.new[3,,2] <- 0.2
P.new[3,,3] <- 0.6
P.new[3,,4] <- 0.2
# The Value Iteration Algorithm
epsilon <- 0.001
v0 <- rep(0, m*(N+1)) # Initial value vector
val.iter.algo <- function(epsilon, A, v0, c_inspect, c_GP, c_ED, P.new){
n <- 0
v <- matrix(v0, ncol = m*(N+1), nrow = 1)
span <- 2*epsilon
while(span>epsilon){
n = n+1
v_all = matrix(nrow=m*(N+1), ncol=length(A))
# Cost-to-go
v_all[,1] = P.new[1,,] %*% v[n,] + as.vector(t(q*c_ED))
v_all[,2] = P.new[2,,] %*% v[n,] + c_inspect
v_all[,3] = P.new[3,,] %*% v[n,] + c_GP
# Setting rewards of impossible actions to Inf
for(j in seq(N+1,m*(N+1), by = N+1)){
v_all[j,1] <- Inf
}
for(j in seq(1,m*(N+1), by = N+1)){
v_all[j,2] <- Inf
}
v <- rbind(v, apply(v_all, 1, min))
span <- max(v[n+1,]-v[n,]) - min(v[n+1,]-v[n,])
}
d <- A[max.col(-v_all, "first")]
rownames(v) <- 0:n
return <- list(v=v, d=d, span = span)
return
}
result <- val.iter.algo(epsilon, A, v0, c_inspect, c_GP, c_ED, P.new)
# The d array in result indicates the best possible action in each state.
result