Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
c6fd838d
Commit
c6fd838d
authored
Jan 24, 2013
by
Steven Cordwell
Browse files
refactor functions
parent
c82e6ae4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
51 deletions
+64
-51
mdp.py
mdp.py
+54
-48
test_mdptoolbox.py
test_mdptoolbox.py
+10
-3
No files found.
mdp.py
View file @
c6fd838d
...
...
@@ -419,7 +419,7 @@ def exampleRand(S, A, is_sparse=False, mask=None):
return
(
P
,
R
)
def
getSpan
(
self
,
W
):
def
getSpan
(
W
):
"""Returns the span of W
sp(W) = max W(s) - min W(s)
...
...
@@ -438,6 +438,9 @@ class MDP(object):
if
(
discount
<=
0
)
or
(
discount
>
1
):
raise
ValueError
(
mdperr
[
"discount_rng"
])
else
:
if
discount
==
1
:
print
(
"PyMDPtoolbox WARNING: check conditions of convergence."
\
"With no discount, convergence is not always assumed."
)
self
.
discount
=
discount
elif
not
discount
is
None
:
raise
ValueError
(
"PyMDPtoolbox: the discount must be a positive "
\
...
...
@@ -493,52 +496,6 @@ class MDP(object):
# self.V = Q.max(axis=1)
# self.policy = Q.argmax(axis=1)
def
computePpolicyPRpolicy
(
self
):
"""Computes the transition matrix and the reward matrix for a policy
Arguments
---------
Let S = number of states, A = number of actions
P(SxSxA) = transition matrix
P could be an array with 3 dimensions or
a cell array (1xA), each cell containing a matrix (SxS) possibly sparse
R(SxSxA) or (SxA) = reward matrix
R could be an array with 3 dimensions (SxSxA) or
a cell array (1xA), each cell containing a sparse matrix (SxS) or
a 2D array(SxA) possibly sparse
policy(S) = a policy
Evaluation
----------
Ppolicy(SxS) = transition matrix for policy
PRpolicy(S) = reward matrix for policy
"""
Ppolicy
=
matrix
(
zeros
((
self
.
S
,
self
.
S
)))
Rpolicy
=
matrix
(
zeros
((
self
.
S
,
1
)))
for
aa
in
range
(
self
.
A
):
# avoid looping over S
# the rows that use action a. .getA1() is used to make sure that
# ind is a 1 dimensional vector
ind
=
nonzero
(
self
.
policy
==
aa
)[
0
].
getA1
()
if
ind
.
size
>
0
:
# if no rows use action a, then no point continuing
Ppolicy
[
ind
,
:]
=
self
.
P
[
aa
][
ind
,
:]
#PR = self.computePR() # an apparently uneeded line, and
# perhaps harmful in this implementation c.f.
# mdp_computePpolicyPRpolicy.m
Rpolicy
[
ind
]
=
self
.
R
[
ind
,
aa
]
# self.R cannot be sparse with the code in its current condition, but
# it should be possible in the future. Also, if R is so big that its
# a good idea to use a sparse matrix for it, then converting PRpolicy
# from a dense to sparse matrix doesn't seem very memory efficient
if
type
(
self
.
R
)
is
sparse
:
Rpolicy
=
sparse
(
Rpolicy
)
#self.Ppolicy = Ppolicy
#self.Rpolicy = Rpolicy
return
(
Ppolicy
,
Rpolicy
)
def
computePR
(
self
,
P
,
R
):
"""Computes the reward for the system in one state chosing an action
...
...
@@ -824,6 +781,52 @@ class PolicyIteration(MDP):
"evaluation or 1 for iterative evaluation. strings 'matrix' "
\
"and 'iterative' can also be used."
)
def
computePpolicyPRpolicy
(
self
):
"""Computes the transition matrix and the reward matrix for a policy
Arguments
---------
Let S = number of states, A = number of actions
P(SxSxA) = transition matrix
P could be an array with 3 dimensions or
a cell array (1xA), each cell containing a matrix (SxS) possibly sparse
R(SxSxA) or (SxA) = reward matrix
R could be an array with 3 dimensions (SxSxA) or
a cell array (1xA), each cell containing a sparse matrix (SxS) or
a 2D array(SxA) possibly sparse
policy(S) = a policy
Evaluation
----------
Ppolicy(SxS) = transition matrix for policy
PRpolicy(S) = reward matrix for policy
"""
Ppolicy
=
matrix
(
zeros
((
self
.
S
,
self
.
S
)))
Rpolicy
=
matrix
(
zeros
((
self
.
S
,
1
)))
for
aa
in
range
(
self
.
A
):
# avoid looping over S
# the rows that use action a. .getA1() is used to make sure that
# ind is a 1 dimensional vector
ind
=
nonzero
(
self
.
policy
==
aa
)[
0
].
getA1
()
if
ind
.
size
>
0
:
# if no rows use action a, then no point continuing
Ppolicy
[
ind
,
:]
=
self
.
P
[
aa
][
ind
,
:]
#PR = self.computePR() # an apparently uneeded line, and
# perhaps harmful in this implementation c.f.
# mdp_computePpolicyPRpolicy.m
Rpolicy
[
ind
]
=
self
.
R
[
ind
,
aa
]
# self.R cannot be sparse with the code in its current condition, but
# it should be possible in the future. Also, if R is so big that its
# a good idea to use a sparse matrix for it, then converting PRpolicy
# from a dense to sparse matrix doesn't seem very memory efficient
if
type
(
self
.
R
)
is
sparse
:
Rpolicy
=
sparse
(
Rpolicy
)
#self.Ppolicy = Ppolicy
#self.Rpolicy = Rpolicy
return
(
Ppolicy
,
Rpolicy
)
def
evalPolicyIterative
(
self
,
V0
=
0
,
epsilon
=
0.0001
,
max_iter
=
10000
):
"""Policy evaluation using iteration
...
...
@@ -1405,10 +1408,13 @@ class ValueIteration(MDP):
self
.
V
=
matrix
(
zeros
((
self
.
S
,
1
)))
else
:
if
(
not
initial_value
.
shape
in
((
self
.
S
,
),
(
self
.
S
,
1
),
(
1
,
self
.
S
))):
raise
ValueError
(
"The initial value must be a vector of length S"
)
raise
ValueError
(
"
PyMDPtoolbox:
The initial value must be a vector of length S"
)
else
:
self
.
V
=
matrix
(
initial_value
)
if
epsilon
<=
0
:
raise
ValueError
(
"PyMDPtoolbox: epsilon must be greater than 0"
)
if
(
self
.
discount
<
1
):
# compute a bound for the number of iterations and update the
# stored value of self.max_iter
...
...
test_mdptoolbox.py
View file @
c6fd838d
...
...
@@ -225,8 +225,7 @@ def test_ValueIteration_iterate():
assert
(
inst
.
iter
==
26
)
def
test_ValueIteration_exampleForest
():
P
,
R
=
exampleForest
()
a
=
ValueIteration
(
P
,
R
,
0.96
)
a
=
ValueIteration
(
Pf
,
Rf
,
0.96
)
a
.
iterate
()
assert
(
a
.
policy
==
array
([
0
,
0
,
0
])).
all
()
assert
a
.
iter
==
4
...
...
@@ -296,7 +295,15 @@ def test_PolicyIteration_matrix_exampleForest():
assert
(
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
def
test_ValueIterationGS
():
# ValueIterationGS
def
test_ValueIterationGS_exampleForest
():
a
=
ValueIterationGS
(
Pf
,
Rf
,
0.9
)
p
=
matrix
(
'0 0 0'
)
itr
=
33
a
.
iterate
()
assert
(
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
#def test_JacksCarRental():
# S = 21 ** 2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment