forked from iadine/PPR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmdp_example_reserve.r
113 lines (99 loc) · 3.41 KB
/
mdp_example_reserve.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# function [P, R] = mdp_example_reserve (M, pj)
mdp_example_reserve <- function(M, pj) {
# mdp_example_reserve Generate a Markov Decision Process example based on
# a simple reserve design problem
# (see the related documentation for more detail)
# Arguments -------------------------------------------------------------
# M(JxI) = species distribution across sites
# J = number of sites (> 0), optional (default 5)
# I = number of species (>0), optional (default 7)
# pj = probability of development occurence, in ]0, 1[, optional (default 0.1)
# Evaluation -------------------------------------------------------------
# P(SxSxA) = transition probability matrix
# R(SxA) = reward matrix
# M(JxI) = random species distribution across sites
if ( nargs() >= 1 & dim(M)[1] <= 1 & dim(M)[2] <= 1 ) {
print('----------------------------------------------------------')
print('MDP Toolbox ERROR: M is a JxI matrix with Number of sites J and species I must be greater than 0')
print('----------------------------------------------------------')
} else if ( nargs() >= 2 & ( pj < 0 | pj > 1 ) ) {
print('-----------------------------------------------------------')
print('MDP Toolbox ERROR: Probability pj must be in [0; 1]')
print('-----------------------------------------------------------')
} else {
# initialization of optional arguments
if (nargs() < 2) pj <- 0.1
if (nargs() < 1) {
J <- 3
I <- 3
M <- round(matrix(nrow=J, ncol=I, data=runif(I*J,0,1)))
}
J <- nrow(M)
I <- ncol(M)
# Definition of states
# each site, 0 is available; 1 is reserved; 2 is developed
S <- 3^J
# action space
A <- J
# There are J actions corresponding to the selection of a site for
# reservation. A site can only be reserved if it is available.
# By convention we will use a ternary base where state #0 is the state
# that corresponds to [0,0, ..,0] all sites are available. State #1 is
# [1,0,0,...,0]; state 2 is [2,0,0 ..,0] and state 3 is [0,1,0, .. 0] and so forth.
# for example
# site = [0,0,1,2] means the first 2 sites are available (site[1:2]=0), site 3 is
# reserved (site[3]=1) and site 4 is developped (site[4]=2).
# Build P(AxSxS)
# complexity is in SxAx2^navail; with 2^navail<=S
P <- array(0, c(S,S,A))
for (s1 in 1:S) {
site1 <- getSite(s1-1, J)
for (a in 1:A) {
site2 <- site1
if (site1[a] == 0) {
site2[a] <- 1
}
availSite <- which(site2 == 0)
if (length(availSite) > 0) {
navail <- length(availSite)
siten <- rep(1,2^navail) %*% t(site2)
aux <- numeric(navail)
for (k in 1:2^navail) {
siten[k,availSite] <- aux*2
ndev <- sum(abs(site2[availSite]-aux))
s2 <- getState(siten[k,])+1
P[s1,s2,a] <- pj^ndev*(1-pj)^(navail-ndev)
aux <- dec2binvec(binvec2dec(aux)+1, navail)
}
} else {
s2 <- getState(site2)+1
P[s1,s2,a] <- 1
}
}
}
# Build R
R <- array(0, c(S,A))
for (s1 in 1:S) {
site1 <- getSite(s1-1,J)
for (a in 1:J) {
if (site1[a] != 2) {
targetSp <- M[a,]
reservedSites <- which(site1==1)
if (length(reservedSites) > 0) {
X <- M[reservedSites,]
if (is.vector(X)) {
reservedSp <- pmax(M[reservedSites,])
}
if (is.array(X)) {
reservedSp <- apply((M[reservedSites,]),2,max)
}
R[s1,a] <- sum(pmax(0,targetSp-reservedSp))
} else {
R[s1,a] <- sum(pmax(0,targetSp))
}
}
}
}
return(list("P"=P, "R"=R))
}
}