Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
ccab1f84
Commit
ccab1f84
authored
Feb 09, 2013
by
Steven Cordwell
Browse files
merge using tuples to store P and R
parents
80a0b065
0be11eea
Changes
2
Hide whitespace changes
Inline
Side-by-side
mdp.py
View file @
ccab1f84
...
...
@@ -59,7 +59,7 @@ source code use ``mdp.ValueIteration??<ENTER>``.
Acknowledgments
---------------
This module is modified from the MDPtoolbox (c) 2009 INRA available at
`
http://www.inra.fr/mia/T/MDPtoolbox/
`_
.
http://www.inra.fr/mia/T/MDPtoolbox/.
"""
...
...
@@ -96,7 +96,7 @@ from math import ceil, log, sqrt
from
random
import
randint
,
random
from
time
import
time
from
numpy
import
absolute
,
array
,
empty
,
diag
,
matrix
,
mean
,
mod
,
multiply
from
numpy
import
absolute
,
array
,
diag
,
empty
,
matrix
,
mean
,
mod
,
multiply
from
numpy
import
ndarray
,
ones
,
zeros
from
numpy.random
import
rand
from
scipy.sparse
import
csr_matrix
as
sparse
...
...
@@ -542,9 +542,9 @@ def exampleRand(S, A, is_sparse=False, mask=None):
# generate the transition and reward matrices based on S, A and mask
if
is_sparse
:
# definition of transition matrix : square stochastic matrix
P
=
zeros
((
A
,
)
,
dtype
=
object
)
P
=
empty
(
A
,
dtype
=
object
)
# definition of reward matrix (values between -1 and +1)
R
=
zeros
((
A
,
)
,
dtype
=
object
)
R
=
empty
(
A
,
dtype
=
object
)
for
a
in
range
(
A
):
if
mask
.
ndim
==
3
:
PP
=
mask
[
a
,
:,
:]
*
rand
(
S
,
S
)
...
...
@@ -719,7 +719,7 @@ class MDP(object):
Q
=
matrix
(
empty
((
self
.
S
,
self
.
A
)))
for
aa
in
range
(
self
.
A
):
Q
[:,
aa
]
=
self
.
R
[
:,
aa
]
+
(
self
.
discount
*
self
.
P
[
aa
]
*
V
)
Q
[:,
aa
]
=
self
.
R
[
aa
]
+
(
self
.
discount
*
self
.
P
[
aa
]
*
V
)
# Which way is better?
# 1. Return, (policy, value)
...
...
@@ -749,35 +749,39 @@ class MDP(object):
# We assume that P and R define a MDP i,e. assumption is that
# check(P, R) has already been run and doesn't fail.
#
#
Make P be an object array with (S, S) shaped array elements. Save it
#
as a
matrix.
if
P
.
dtype
==
object
:
self
.
P
=
P
self
.
A
=
self
.
P
.
shape
[
0
]
self
.
S
=
self
.
P
[
0
]
.
shape
[
0
]
else
:
# convert to an object array
self
.
A
=
P
.
shape
[
0
]
self
.
S
=
P
.
shape
[
1
]
self
.
P
=
zeros
(
self
.
A
,
dtype
=
object
)
for
aa
in
range
(
self
.
A
)
:
self
.
P
[
aa
]
=
matrix
(
P
[
aa
,
:,
:])
#
Make R have the shape (S, A) and save i
t
a
s
a
matri
x
if
R
.
dtype
==
object
:
# R is object shaped (A,) with each element shaped (S, S)
self
.
R
=
matrix
(
zeros
((
self
.
S
,
self
.
A
))
)
for
aa
in
rang
e
(
self
.
A
):
self
.
R
[:,
aa
]
=
(
multiply
(
P
[
aa
],
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
,
1
))
else
:
#
Set self.P as a tuple of length A, with each element storing an S×S
# matrix.
self
.
A
=
len
(
P
)
try
:
if
P
.
ndim
==
3
:
self
.
S
=
P
.
shape
[
1
]
else
:
self
.
S
=
P
[
0
]
.
shape
[
0
]
except
AttributeError
:
self
.
S
=
P
[
0
].
shape
[
0
]
except
:
raise
#
conver
t
P
s
to
matri
ces
self
.
P
=
[]
for
aa
in
xrange
(
self
.
A
):
self
.
P
.
append
(
P
[
aa
]
)
self
.
P
=
tupl
e
(
self
.
P
)
# Set self.R as a tuple of length A, with each element storing an S×1
# vector array.
try
:
if
R
.
ndim
==
2
:
# R already has shape (S, A)
self
.
R
=
matrix
(
R
)
self
.
R
=
[]
for
aa
in
xrange
(
self
.
A
):
self
.
R
.
append
(
R
[:,
aa
].
reshape
(
self
.
S
,
1
))
else
:
# R has shape (A, S, S)
self
.
R
=
matrix
(
zeros
((
self
.
S
,
self
.
A
)))
for
aa
in
range
(
self
.
A
):
self
.
R
[:,
aa
]
=
(
multiply
(
P
[
aa
],
R
[
aa
,
:,
:]).
sum
(
1
).
reshape
(
self
.
S
,
1
))
raise
AttributeError
except
AttributeError
:
self
.
R
=
[]
for
aa
in
xrange
(
self
.
A
):
self
.
R
.
append
(
multiply
(
P
[
aa
],
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
,
1
))
except
:
raise
self
.
R
=
tuple
(
self
.
R
)
def
iterate
(
self
):
"""Raise error because child classes should implement this function."""
...
...
@@ -1086,7 +1090,7 @@ class PolicyIteration(MDP):
#PR = self._computePR() # an apparently uneeded line, and
# perhaps harmful in this implementation c.f.
# mdp_computePpolicyPRpolicy.m
Rpolicy
[
ind
]
=
self
.
R
[
ind
,
aa
]
Rpolicy
[
ind
]
=
self
.
R
[
aa
][
ind
]
# self.R cannot be sparse with the code in its current condition, but
# it should be possible in the future. Also, if R is so big that its
...
...
@@ -1826,15 +1830,14 @@ class ValueIteration(MDP):
h
=
zeros
(
self
.
S
)
for
ss
in
range
(
self
.
S
):
PP
=
matrix
(
zeros
((
self
.
S
,
self
.
A
))
)
PP
=
zeros
((
self
.
S
,
self
.
A
))
for
aa
in
range
(
self
.
A
):
try
:
PP
[:,
aa
]
=
self
.
P
[
aa
][:,
ss
]
except
ValueError
:
try
:
PP
[:,
aa
]
=
self
.
P
[
aa
][:,
ss
].
todense
()
except
:
raise
PP
[:,
aa
]
=
self
.
P
[
aa
][:,
ss
].
todense
()
except
:
raise
# the function "min()" without any arguments finds the
# minimum of the entire array.
h
[
ss
]
=
PP
.
min
()
...
...
@@ -1955,7 +1958,7 @@ class ValueIterationGS(ValueIteration):
for
s
in
range
(
self
.
S
):
Q
=
[]
for
a
in
range
(
self
.
A
):
Q
.
append
(
float
(
self
.
R
[
s
,
a
]
+
Q
.
append
(
float
(
self
.
R
[
a
][
s
]
+
self
.
discount
*
self
.
P
[
a
][
s
,
:]
*
self
.
V
))
self
.
V
[
s
]
=
max
(
Q
)
...
...
@@ -1981,7 +1984,7 @@ class ValueIterationGS(ValueIteration):
for
s
in
range
(
self
.
S
):
Q
=
zeros
(
self
.
A
)
for
a
in
range
(
self
.
A
):
Q
[
a
]
=
self
.
R
[
s
,
a
]
+
self
.
P
[
a
][
s
,:]
*
self
.
discount
*
self
.
V
Q
[
a
]
=
self
.
R
[
a
][
s
]
+
self
.
P
[
a
][
s
,:]
*
self
.
discount
*
self
.
V
self
.
V
[
s
]
=
Q
.
max
()
self
.
policy
.
append
(
int
(
Q
.
argmax
()))
...
...
test_mdptoolbox.py
View file @
ccab1f84
...
...
@@ -235,38 +235,48 @@ def test_exampleRand_sparse_check():
# MDP
def
test_MDP_P_R_1
():
P1
=
zeros
((
2
,
),
dtype
=
object
)
P1
[
0
]
=
matrix
(
'0.5 0.5; 0.8 0.2'
)
P1
[
1
]
=
matrix
(
'0 1; 0.1 0.9'
)
R1
=
matrix
(
'5 10; -1 2'
)
P1
=
[]
P1
.
append
(
matrix
(
'0.5 0.5; 0.8 0.2'
))
P1
.
append
(
matrix
(
'0 1; 0.1 0.9'
))
P1
=
tuple
(
P1
)
R1
=
[]
R1
.
append
(
matrix
(
'5; -1'
))
R1
.
append
(
matrix
(
'10; 2'
))
R1
=
tuple
(
R1
)
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
assert
a
.
P
.
dtype
==
P1
.
dtype
assert
a
.
R
.
dtype
==
R1
.
dtype
assert
type
(
a
.
P
)
==
type
(
P1
)
assert
type
(
a
.
R
)
==
type
(
R1
)
for
kk
in
range
(
2
):
assert
(
a
.
P
[
kk
]
==
P1
[
kk
]).
all
()
assert
(
a
.
R
==
R1
).
all
()
assert
(
a
.
R
[
kk
]
==
R1
[
kk
]
).
all
()
def
test_MDP_P_R_2
():
R
=
array
([[[
5
,
10
],
[
-
1
,
2
]],
[[
1
,
2
],
[
3
,
4
]]])
P1
=
empty
(
2
,
dtype
=
object
)
P1
[
0
]
=
matrix
(
'0.5 0.5; 0.8 0.2'
)
P1
[
1
]
=
matrix
(
'0 1; 0.1 0.9'
)
R1
=
matrix
(
'7.5 2; -0.4 3.9'
)
P1
=
[]
P1
.
append
(
matrix
(
'0.5 0.5; 0.8 0.2'
))
P1
.
append
(
matrix
(
'0 1; 0.1 0.9'
))
P1
=
tuple
(
P1
)
R1
=
[]
R1
.
append
(
matrix
(
'7.5; -0.4'
))
R1
.
append
(
matrix
(
'2; 3.9'
))
R1
=
tuple
(
R1
)
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
assert
type
(
a
.
P
)
==
type
(
P1
)
assert
type
(
a
.
R
)
==
type
(
R1
)
assert
a
.
P
.
dtype
==
P1
.
dtype
assert
a
.
R
.
dtype
==
R1
.
dtype
for
kk
in
range
(
2
):
assert
(
a
.
P
[
kk
]
==
P1
[
kk
]).
all
()
assert
(
absolute
(
a
.
R
-
R1
)
<
SMALLNUM
).
all
()
assert
(
absolute
(
a
.
R
[
kk
]
-
R1
[
kk
]
)
<
SMALLNUM
).
all
()
def
test_MDP_P_R_3
():
P
=
array
([[[
0.6116
,
0.3884
],[
0
,
1
]],[[
0.6674
,
0.3326
],[
0
,
1
]]])
R
=
array
([[[
-
0.2433
,
0.7073
],[
0
,
0.1871
]],[[
-
0.0069
,
0.6433
],[
0
,
0.2898
]]])
PR
=
matrix
(
'0.12591304 0.20935652; 0.1871 0.2898'
)
PR
=
[]
PR
.
append
(
matrix
(
'0.12591304; 0.1871'
))
PR
.
append
(
matrix
(
'0.20935652;0.2898'
))
PR
=
tuple
(
PR
)
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
assert
(
absolute
(
a
.
R
-
PR
)
<
SMALLNUM
).
all
()
for
kk
in
range
(
2
):
assert
(
absolute
(
a
.
R
[
kk
]
-
PR
[
kk
])
<
SMALLNUM
).
all
()
# LP
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment