-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPPR_nonStationay_example.r
64 lines (50 loc) · 1.87 KB
/
PPR_nonStationay_example.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#########################################################################
# RESERVE EXAMPLE
#########################################################################
rm(list=ls()) # remove existing variables
library(MDPtoolbox)
library(graphics)
source('mdp_finite_horizon_nonStationary.r')
source('mdp_example_PPR_non_stationary.r')
source('explore_solution_PPR.r')
source('dec2binvec.r')
source('getSite.r')
source('binvec2dec.r')
source('getState.r')
toyPB = TRUE
# CAREFULL when using getState(), the id of the state returned should get +1 (starts at 0)
# > x= c(2,1,0)
# > getState(x)
# [1] 5
# > policy[6,2]
# [1] 3
if (toyPB==F){
## Specification of the non stationary PPR problem
# How many sites and time steps
init_site <- 3
time_step <- 3 # last step is time_step+1
# M is the time dependent benefit matrix Site x time_step
# random generation or provide data
M <- round(matrix(nrow=init_site, ncol=time_step, data=runif(init_site*time_step,1,5)))
# term is a vector representing the terminal benefit (reward) at each site (time_step+1)
term <- round(matrix(runif(init_site, 1, 5), nrow = init_site, ncol = 1))
# Pj is the time dependent matrix representing the probability of a site being converted at every time step
Pj <- round(array(runif(init_site*time_step, min=0, max=0.4), c(init_site,time_step))*100)/100
} else {source('PPR_toyproblem.r')}
## Build the MDP
# Generate the transition and reward matrix
PR <- mdp_example_PPR_non_stationary(M,term,Pj)
P <- PR$P # Probability transitions P(SxSxAxT)
R <- PR$R # Reward R(SxAxT)
h <- PR$RT # terminal Reward R(S)
## Solve the MDP
# Solve the PPR problem
results <- mdp_finite_horizon_nonStationary(P, R, 1, time_step, h);
V <- results$V
policy <- results$policy
print(policy)
print(V)
## Explore solution
sim <- explore_solution_PPR(numeric(init_site), policy, M, P, R,h)
sim$Treward
sim$Tsites