Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
77dce9f8
Commit
77dce9f8
authored
Jan 26, 2013
by
Steven Cordwell
Browse files
class LP is now fixed, it requires cvxopt module to be installed
parent
3eb541ec
Changes
2
Hide whitespace changes
Inline
Side-by-side
mdp.py
View file @
77dce9f8
...
@@ -684,13 +684,14 @@ class LP(MDP):
...
@@ -684,13 +684,14 @@ class LP(MDP):
try
:
try
:
from
cvxopt
import
matrix
,
solvers
from
cvxopt
import
matrix
,
solvers
self
.
linprog
=
solvers
.
lp
self
.
linprog
=
solvers
.
lp
self
.
cvxmat
=
matrix
except
ImportError
:
except
ImportError
:
raise
ImportError
(
"The python module cvxopt is required to use "
\
raise
ImportError
(
"The python module cvxopt is required to use "
\
"linear programming functionality."
)
"linear programming functionality."
)
from
scipy.sparse
import
eye
as
speye
from
scipy.sparse
import
eye
as
speye
MDP
.
__init__
(
self
,
transitions
,
reward
,
discount
,
None
)
MDP
.
__init__
(
self
,
transitions
,
reward
,
discount
,
None
,
None
)
# The objective is to resolve : min V / V >= PR + discount*P*V
# The objective is to resolve : min V / V >= PR + discount*P*V
# The function linprog of the optimisation Toolbox of Mathworks resolves :
# The function linprog of the optimisation Toolbox of Mathworks resolves :
...
@@ -698,24 +699,34 @@ class LP(MDP):
...
@@ -698,24 +699,34 @@ class LP(MDP):
# So the objective could be expressed as : min V / (discount*P-I) * V <= - PR
# So the objective could be expressed as : min V / (discount*P-I) * V <= - PR
# To avoid loop on states, the matrix M is structured following actions M(A*S,S)
# To avoid loop on states, the matrix M is structured following actions M(A*S,S)
self
.
f
=
ones
(
self
.
S
,
1
)
self
.
f
=
self
.
cvxmat
(
ones
(
(
self
.
S
,
1
)
))
self
.
M
=
zeros
((
self
.
A
*
self
.
S
,
self
.
S
))
self
.
M
=
zeros
((
self
.
A
*
self
.
S
,
self
.
S
))
for
aa
in
range
(
self
.
A
):
for
aa
in
range
(
self
.
A
):
pos
=
(
aa
+
1
)
*
self
.
S
pos
=
(
aa
+
1
)
*
self
.
S
self
.
M
[(
pos
-
self
.
S
):
pos
,
:]
=
discount
*
self
.
P
[
aa
]
-
speye
(
self
.
S
,
self
.
S
)
self
.
M
[(
pos
-
self
.
S
):
pos
,
:]
=
discount
*
self
.
P
[
aa
]
-
speye
(
self
.
S
,
self
.
S
)
self
.
M
=
mat
rix
(
self
.
M
)
self
.
M
=
self
.
cvx
mat
(
self
.
M
)
def
iterate
(
self
):
def
iterate
(
self
):
""""""
""""""
self
.
time
=
time
()
self
.
time
=
time
()
self
.
V
=
self
.
linprog
(
self
.
f
,
self
.
M
,
-
self
.
R
)
h
=
self
.
cvxmat
(
self
.
R
.
reshape
(
self
.
S
*
self
.
A
,
1
,
order
=
"F"
),
tc
=
'd'
)
self
.
V
,
self
.
policy
=
self
.
bellmanOperator
(
self
.
P
,
self
.
R
,
self
.
discount
,
self
.
V
)
# Using the glpk option will make this behave more like Octave
# (Octave uses glpk) and perhaps Matlab. If solver=None (ie using the
# default cvxopt solver) then V agrees with the Octave equivalent
# only to 10e-8 places.
self
.
V
=
matrix
(
self
.
linprog
(
self
.
f
,
self
.
M
,
-
h
,
solver
=
'glpk'
)[
'x'
])
self
.
policy
,
self
.
V
=
self
.
bellmanOperator
()
self
.
time
=
time
()
-
self
.
time
self
.
time
=
time
()
-
self
.
time
# store value and policy as tuples
self
.
V
=
tuple
(
self
.
V
.
getA1
().
tolist
())
self
.
policy
=
tuple
(
self
.
policy
.
getA1
().
tolist
())
class
PolicyIteration
(
MDP
):
class
PolicyIteration
(
MDP
):
"""Resolution of discounted MDP with policy iteration algorithm.
"""Resolution of discounted MDP with policy iteration algorithm.
...
...
test_mdptoolbox.py
View file @
77dce9f8
...
@@ -5,8 +5,8 @@ Created on Sun May 27 23:16:57 2012
...
@@ -5,8 +5,8 @@ Created on Sun May 27 23:16:57 2012
@author: -
@author: -
"""
"""
from
mdp
import
check
,
checkSquareStochastic
,
exampleForest
,
exampleRand
,
MD
P
from
mdp
import
check
,
checkSquareStochastic
,
exampleForest
,
exampleRand
,
L
P
from
mdp
import
PolicyIteration
,
QLearning
,
RelativeValueIteration
from
mdp
import
MDP
,
PolicyIteration
,
QLearning
,
RelativeValueIteration
from
mdp
import
ValueIteration
,
ValueIterationGS
from
mdp
import
ValueIteration
,
ValueIterationGS
from
numpy
import
absolute
,
array
,
eye
,
matrix
,
zeros
from
numpy
import
absolute
,
array
,
eye
,
matrix
,
zeros
...
@@ -212,6 +212,16 @@ def test_MDP_P_R_3():
...
@@ -212,6 +212,16 @@ def test_MDP_P_R_3():
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
assert
(
absolute
(
a
.
R
-
PR
)
<
SMALLNUM
).
all
()
assert
(
absolute
(
a
.
R
-
PR
)
<
SMALLNUM
).
all
()
# LP
def
test_LP
():
a
=
LP
(
P
,
R
,
0.9
)
v
=
matrix
(
'42.4418604651163 36.0465116279070'
)
p
=
matrix
(
'1 0'
)
a
.
iterate
()
assert
(
array
(
a
.
policy
)
==
p
).
all
()
assert
(
absolute
(
array
(
a
.
V
)
-
v
)
<
SMALLNUM
).
all
()
# PolicyIteration
# PolicyIteration
def
test_PolicyIteration_init_policy0
():
def
test_PolicyIteration_init_policy0
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment