Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
ad4c8886
Commit
ad4c8886
authored
Sep 10, 2013
by
Steven Cordwell
Browse files
Merge branch 'assertstmts'
parents
d4a32136
6630e624
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/mdptoolbox/example.py
View file @
ad4c8886
...
...
@@ -79,8 +79,8 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False):
Examples
--------
>>> import mdp
>>> P, R = mdp.example
F
orest()
>>> import mdp
toolbox.example
>>> P, R = mdp
toolbox
.example
.f
orest()
>>> P
array([[[ 0.1, 0.9, 0. ],
[ 0.1, 0. , 0.9],
...
...
@@ -164,8 +164,8 @@ def rand(S, A, is_sparse=False, mask=None):
Examples
--------
>>> import mdp
>>> P, R = mdp.example
R
and(5, 3)
>>> import mdp
toolbox.example
>>> P, R = mdp
toolbox
.example
.r
and(5, 3)
"""
# making sure the states and actions are more than one
...
...
src/mdptoolbox/mdp.py
View file @
ad4c8886
...
...
@@ -156,37 +156,22 @@ class MDP(object):
# if the discount is None then the algorithm is assumed to not use it
# in its computations
if
type
(
discount
)
in
(
int
,
float
):
if
(
discount
<=
0
)
or
(
discount
>
1
):
raise
ValueError
(
"Discount rate must be in ]0; 1]"
)
else
:
if
discount
==
1
:
print
(
"PyMDPtoolbox WARNING: check conditions of "
"convergence. With no discount, convergence is not "
"always assumed."
)
self
.
discount
=
discount
elif
discount
is
not
None
:
raise
ValueError
(
"PyMDPtoolbox: the discount must be a positive "
"real number less than or equal to one."
)
if
discount
is
not
None
:
self
.
discount
=
float
(
discount
)
assert
0.0
<
self
.
discount
<=
1.0
,
"Discount rate must be in ]0; 1]"
if
self
.
discount
==
1
:
print
(
"PyMDPtoolbox WARNING: check conditions of convergence. "
"With no discount, convergence is not always assumed."
)
# if the max_iter is None then the algorithm is assumed to not use it
# in its computations
if
type
(
max_iter
)
in
(
int
,
float
):
if
max_iter
<=
0
:
raise
ValueError
(
"The maximum number of iterations must be "
"greater than 0"
)
else
:
self
.
max_iter
=
max_iter
elif
max_iter
is
not
None
:
raise
ValueError
(
"PyMDPtoolbox: max_iter must be a positive real "
"number greater than zero."
)
if
max_iter
is
not
None
:
self
.
max_iter
=
int
(
max_iter
)
assert
self
.
max_iter
>
0
,
"The maximum number of iterations "
\
"must be greater than 0."
# check that epsilon is something sane
if
type
(
epsilon
)
in
(
int
,
float
):
if
epsilon
<=
0
:
raise
ValueError
(
"PyMDPtoolbox: epsilon must be greater than "
"0."
)
elif
epsilon
is
not
None
:
raise
ValueError
(
"PyMDPtoolbox: epsilon must be a positive real "
"number greater than zero."
)
if
epsilon
is
not
None
:
self
.
epsilon
=
float
(
epsilon
)
assert
self
.
epsilon
>
0
,
"Epsilon must be greater than 0."
# we run a check on P and R to make sure they are describing an MDP. If
# an exception isn't raised then they are assumed to be correct.
check
(
transitions
,
reward
)
...
...
@@ -226,10 +211,10 @@ class MDP(object):
else
:
# make sure the user supplied V is of the right shape
try
:
if
V
.
shape
not
in
((
self
.
S
,),
(
1
,
self
.
S
))
:
raise
ValueError
(
"bellman: V is not the right shape
."
)
assert
V
.
shape
in
((
self
.
S
,),
(
1
,
self
.
S
))
,
"V is not the "
\
"right shape (Bellman operator)
."
except
AttributeError
:
raise
TypeError
(
"
bellman:
V must be a numpy array or matrix."
)
raise
TypeError
(
"V must be a numpy array or matrix."
)
# Looping through each action the the Q-value matrix is calculated.
# P and V can be any object that supports indexing, so it is important
# that you know they define a valid MDP before calling the
...
...
@@ -274,34 +259,20 @@ class MDP(object):
self
.
S
=
P
[
0
].
shape
[
0
]
except
AttributeError
:
self
.
S
=
P
[
0
].
shape
[
0
]
except
:
raise
# convert Ps to matrices
self
.
P
=
[]
for
aa
in
xrange
(
self
.
A
):
self
.
P
.
append
(
P
[
aa
])
self
.
P
=
tuple
(
self
.
P
)
# convert P to a tuple of numpy arrays
self
.
P
=
tuple
([
P
[
aa
]
for
aa
in
range
(
self
.
A
)])
# Set self.R as a tuple of length A, with each element storing an 1×S
# vector.
try
:
if
R
.
ndim
==
2
:
self
.
R
=
[]
for
aa
in
xrange
(
self
.
A
):
self
.
R
.
append
(
array
(
R
[:,
aa
]).
reshape
(
self
.
S
))
self
.
R
=
tuple
([
array
(
R
[:,
aa
]).
reshape
(
self
.
S
)
for
aa
in
range
(
self
.
A
)])
else
:
raise
AttributeError
self
.
R
=
tuple
([
multiply
(
P
[
aa
],
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
)
for
aa
in
xrange
(
self
.
A
)])
except
AttributeError
:
self
.
R
=
[]
for
aa
in
xrange
(
self
.
A
):
try
:
self
.
R
.
append
(
P
[
aa
].
multiply
(
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
))
except
AttributeError
:
self
.
R
.
append
(
multiply
(
P
[
aa
],
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
))
except
:
raise
except
:
raise
self
.
R
=
tuple
(
self
.
R
)
self
.
R
=
tuple
([
multiply
(
P
[
aa
],
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
)
for
aa
in
xrange
(
self
.
A
)])
def
_iterate
(
self
):
# Raise error because child classes should implement this function.
...
...
@@ -371,10 +342,8 @@ class FiniteHorizon(MDP):
def
__init__
(
self
,
transitions
,
reward
,
discount
,
N
,
h
=
None
):
# Initialise a finite horizon MDP.
if
N
<
1
:
raise
ValueError
(
'PyMDPtoolbox: N must be greater than 0'
)
else
:
self
.
N
=
N
self
.
N
=
int
(
N
)
assert
self
.
N
>
0
,
'PyMDPtoolbox: N must be greater than 0.'
# Initialise the base class
MDP
.
__init__
(
self
,
transitions
,
reward
,
discount
,
None
,
None
)
# remove the iteration counter, it is not meaningful for backwards
...
...
src/mdptoolbox/utils.py
View file @
ad4c8886
...
...
@@ -7,6 +7,8 @@ Created on Sun Aug 18 14:30:09 2013
from
numpy
import
absolute
,
ones
SMALLNUM
=
10e-12
# These need to be fixed so that we use classes derived from Error.
mdperr
=
{
"mat_nonneg"
:
...
...
@@ -250,7 +252,7 @@ def checkSquareStochastic(Z):
# check that the matrix is square, and that each row sums to one
if
s1
!=
s2
:
raise
InvalidMDPError
(
mdperr
[
"mat_square"
])
elif
(
absolute
(
Z
.
sum
(
axis
=
1
)
-
ones
(
s2
))).
max
()
>
10e-12
:
elif
(
absolute
(
Z
.
sum
(
axis
=
1
)
-
ones
(
s2
))).
max
()
>
SMALLNUM
:
raise
InvalidMDPError
(
mdperr
[
"mat_stoch"
])
# make sure that there are no values less than zero
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment