Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
4798ff01
Commit
4798ff01
authored
Jun 21, 2013
by
Steven Cordwell
Browse files
use rather than importing individual pieces from mdp
parent
a1cf5c8d
Changes
1
Hide whitespace changes
Inline
Side-by-side
test_mdptoolbox.py
View file @
4798ff01
...
...
@@ -18,9 +18,7 @@ from scipy.sparse import eye as speye
from
scipy.sparse
import
csr_matrix
as
sparse
#from scipy.stats.distributions import poisson
from
mdp
import
check
,
checkSquareStochastic
,
exampleForest
,
exampleRand
from
mdp
import
MDP
,
PolicyIteration
,
QLearning
,
RelativeValueIteration
from
mdp
import
ValueIteration
,
ValueIterationGS
import
mdp
STATES
=
10
ACTIONS
=
3
...
...
@@ -32,9 +30,9 @@ R = array([[5, 10], [-1, 2]])
Ps
=
empty
(
2
,
dtype
=
object
)
Ps
[
0
]
=
sparse
([[
0.5
,
0.5
],[
0.8
,
0.2
]])
Ps
[
1
]
=
sparse
([[
0
,
1
],[
0.1
,
0.9
]])
Pf
,
Rf
=
exampleForest
()
Pr
,
Rr
=
exampleRand
(
STATES
,
ACTIONS
)
Prs
,
Rrs
=
exampleRand
(
STATES
,
ACTIONS
,
is_sparse
=
True
)
Pf
,
Rf
=
mdp
.
exampleForest
()
Pr
,
Rr
=
mdp
.
exampleRand
(
STATES
,
ACTIONS
)
Prs
,
Rrs
=
mdp
.
exampleRand
(
STATES
,
ACTIONS
,
is_sparse
=
True
)
# check: square, stochastic and non-negative ndarrays
...
...
@@ -44,14 +42,14 @@ def test_check_square_stochastic_nonnegative_array_1():
for
a
in
range
(
ACTIONS
):
P
[
a
,
:,
:]
=
eye
(
STATES
)
R
[:,
a
]
=
rand
(
STATES
)
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_square_stochastic_nonnegative_array_2
():
P
=
zeros
((
ACTIONS
,
STATES
,
STATES
))
R
=
rand
(
ACTIONS
,
STATES
,
STATES
)
for
a
in
range
(
ACTIONS
):
P
[
a
,
:,
:]
=
eye
(
STATES
)
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
# check: P - square, stochastic and non-negative object arrays
...
...
@@ -60,21 +58,21 @@ def test_check_P_square_stochastic_nonnegative_object_array():
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
range
(
ACTIONS
):
P
[
a
]
=
eye
(
STATES
)
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_object_matrix
():
P
=
empty
(
ACTIONS
,
dtype
=
object
)
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
range
(
ACTIONS
):
P
[
a
]
=
matrix
(
eye
(
STATES
))
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_object_sparse
():
P
=
empty
(
ACTIONS
,
dtype
=
object
)
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
range
(
ACTIONS
):
P
[
a
]
=
speye
(
STATES
,
STATES
).
tocsr
()
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
# check: P - square, stochastic and non-negative lists
...
...
@@ -83,21 +81,21 @@ def test_check_P_square_stochastic_nonnegative_list_array():
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
xrange
(
ACTIONS
):
P
.
append
(
eye
(
STATES
))
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_list_matrix
():
P
=
[]
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
xrange
(
ACTIONS
):
P
.
append
(
matrix
(
eye
(
STATES
)))
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_list_sparse
():
P
=
[]
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
xrange
(
ACTIONS
):
P
.
append
(
speye
(
STATES
,
STATES
).
tocsr
())
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
# check: P - square, stochastic and non-negative dicts
...
...
@@ -106,21 +104,21 @@ def test_check_P_square_stochastic_nonnegative_dict_array():
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
xrange
(
ACTIONS
):
P
[
a
]
=
eye
(
STATES
)
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_dict_matrix
():
P
=
{}
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
xrange
(
ACTIONS
):
P
[
a
]
=
matrix
(
eye
(
STATES
))
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_dict_sparse
():
P
=
{}
R
=
rand
(
STATES
,
ACTIONS
)
for
a
in
xrange
(
ACTIONS
):
P
[
a
]
=
speye
(
STATES
,
STATES
).
tocsr
()
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
# check: R - square stochastic and non-negative sparse
...
...
@@ -129,7 +127,7 @@ def test_check_R_square_stochastic_nonnegative_sparse():
R
=
sparse
(
rand
(
STATES
,
ACTIONS
))
for
a
in
range
(
ACTIONS
):
P
[
a
,
:,
:]
=
eye
(
STATES
)
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
# check: R - square, stochastic and non-negative object arrays
...
...
@@ -139,7 +137,7 @@ def test_check_R_square_stochastic_nonnegative_object_array():
for
a
in
range
(
ACTIONS
):
P
[
a
,
:,
:]
=
eye
(
STATES
)
R
[
a
]
=
rand
(
STATES
,
STATES
)
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_R_square_stochastic_nonnegative_object_matrix
():
P
=
zeros
((
ACTIONS
,
STATES
,
STATES
))
...
...
@@ -147,7 +145,7 @@ def test_check_R_square_stochastic_nonnegative_object_matrix():
for
a
in
range
(
ACTIONS
):
P
[
a
,
:,
:]
=
eye
(
STATES
)
R
[
a
]
=
matrix
(
rand
(
STATES
,
STATES
))
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
def
test_check_R_square_stochastic_nonnegative_object_sparse
():
P
=
zeros
((
ACTIONS
,
STATES
,
STATES
))
...
...
@@ -155,7 +153,7 @@ def test_check_R_square_stochastic_nonnegative_object_sparse():
for
a
in
range
(
ACTIONS
):
P
[
a
,
:,
:]
=
eye
(
STATES
)
R
[
a
]
=
sparse
(
rand
(
STATES
,
STATES
))
assert
(
check
(
P
,
R
)
==
None
)
assert
(
mdp
.
check
(
P
,
R
)
==
None
)
# checkSquareStochastic: square, stochastic and non-negative
...
...
@@ -163,35 +161,35 @@ def test_checkSquareStochastic_square_stochastic_nonnegative_array():
P
=
rand
(
STATES
,
STATES
)
for
s
in
range
(
STATES
):
P
[
s
,
:]
=
P
[
s
,
:]
/
P
[
s
,
:].
sum
()
assert
checkSquareStochastic
(
P
)
==
None
assert
mdp
.
checkSquareStochastic
(
P
)
==
None
def
test_checkSquareStochastic_square_stochastic_nonnegative_matrix
():
P
=
rand
(
STATES
,
STATES
)
for
s
in
range
(
STATES
):
P
[
s
,
:]
=
P
[
s
,
:]
/
P
[
s
,
:].
sum
()
P
=
matrix
(
P
)
assert
checkSquareStochastic
(
P
)
==
None
assert
mdp
.
checkSquareStochastic
(
P
)
==
None
def
test_checkSquareStochastic_square_stochastic_nonnegative_sparse
():
P
=
rand
(
STATES
,
STATES
)
for
s
in
range
(
STATES
):
P
[
s
,
:]
=
P
[
s
,
:]
/
P
[
s
,
:].
sum
()
P
=
sparse
(
P
)
assert
checkSquareStochastic
(
P
)
==
None
assert
mdp
.
checkSquareStochastic
(
P
)
==
None
# checkSquareStochastic: eye
def
test_checkSquareStochastic_eye_array
():
P
=
eye
(
STATES
)
assert
checkSquareStochastic
(
P
)
==
None
assert
mdp
.
checkSquareStochastic
(
P
)
==
None
def
test_checkSquareStochastic_eye_matrix
():
P
=
matrix
(
eye
(
STATES
))
assert
checkSquareStochastic
(
P
)
==
None
assert
mdp
.
checkSquareStochastic
(
P
)
==
None
def
test_checkSquareStochastic_eye_sparse
():
P
=
speye
(
STATES
,
STATES
).
tocsr
()
assert
checkSquareStochastic
(
P
)
==
None
assert
mdp
.
checkSquareStochastic
(
P
)
==
None
# exampleForest
...
...
@@ -209,8 +207,8 @@ def test_exampleForest_R_shape():
[
4
,
2
]])).
all
()
def
test_exampleForest_check
():
P
,
R
=
exampleForest
(
10
,
5
,
3
,
0.2
)
assert
check
(
P
,
R
)
==
None
P
,
R
=
mdp
.
exampleForest
(
10
,
5
,
3
,
0.2
)
assert
mdp
.
check
(
P
,
R
)
==
None
# exampleRand
...
...
@@ -221,7 +219,7 @@ def test_exampleRand_dense_R_shape():
assert
(
Rr
.
shape
==
(
ACTIONS
,
STATES
,
STATES
))
def
test_exampleRand_dense_check
():
assert
check
(
Pr
,
Rr
)
==
None
assert
mdp
.
check
(
Pr
,
Rr
)
==
None
def
test_exampleRand_sparse_P_shape
():
assert
(
len
(
Prs
)
==
ACTIONS
)
...
...
@@ -230,7 +228,7 @@ def test_exampleRand_sparse_R_shape():
assert
(
len
(
Rrs
)
==
ACTIONS
)
def
test_exampleRand_sparse_check
():
assert
check
(
Prs
,
Rrs
)
==
None
assert
mdp
.
check
(
Prs
,
Rrs
)
==
None
# MDP
...
...
@@ -243,7 +241,7 @@ def test_MDP_P_R_1():
R1
.
append
(
matrix
(
'5; -1'
))
R1
.
append
(
matrix
(
'10; 2'
))
R1
=
tuple
(
R1
)
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
a
=
mdp
.
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
assert
type
(
a
.
P
)
==
type
(
P1
)
assert
type
(
a
.
R
)
==
type
(
R1
)
for
kk
in
range
(
2
):
...
...
@@ -260,7 +258,7 @@ def test_MDP_P_R_2():
R1
.
append
(
matrix
(
'7.5; -0.4'
))
R1
.
append
(
matrix
(
'2; 3.9'
))
R1
=
tuple
(
R1
)
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
a
=
mdp
.
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
assert
type
(
a
.
P
)
==
type
(
P1
)
assert
type
(
a
.
R
)
==
type
(
R1
)
for
kk
in
range
(
2
):
...
...
@@ -274,7 +272,7 @@ def test_MDP_P_R_3():
PR
.
append
(
matrix
(
'0.12591304; 0.1871'
))
PR
.
append
(
matrix
(
'0.20935652;0.2898'
))
PR
=
tuple
(
PR
)
a
=
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
a
=
mdp
.
MDP
(
P
,
R
,
0.9
,
0.01
,
1
)
for
kk
in
range
(
2
):
assert
(
absolute
(
a
.
R
[
kk
]
-
PR
[
kk
])
<
SMALLNUM
).
all
()
...
...
@@ -291,17 +289,17 @@ def test_MDP_P_R_3():
# PolicyIteration
def
test_PolicyIteration_init_policy0
():
a
=
PolicyIteration
(
P
,
R
,
0.9
)
a
=
mdp
.
PolicyIteration
(
P
,
R
,
0.9
)
p
=
matrix
(
'1; 1'
)
assert
(
a
.
policy
==
p
).
all
()
def
test_PolicyIteration_init_policy0_exampleForest
():
a
=
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
PolicyIteration
(
Pf
,
Rf
,
0.9
)
p
=
matrix
(
'0; 1; 0'
)
assert
(
a
.
policy
==
p
).
all
()
def
test_PolicyIteration_computePpolicyPRpolicy_exampleForest
():
a
=
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
PolicyIteration
(
Pf
,
Rf
,
0.9
)
P1
=
matrix
(
'0.1 0.9 0; 1 0 0; 0.1 0 0.9'
)
R1
=
matrix
(
'0; 1; 4'
)
Ppolicy
,
Rpolicy
=
a
.
_computePpolicyPRpolicy
()
...
...
@@ -312,7 +310,7 @@ def test_PolicyIteration_evalPolicyIterative_exampleForest():
v0
=
matrix
(
'0; 0; 0'
)
v1
=
matrix
(
'4.47504640074458; 5.02753258879703; 23.17234211944304'
)
p
=
matrix
(
'0; 1; 0'
)
a
=
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
PolicyIteration
(
Pf
,
Rf
,
0.9
)
assert
(
absolute
(
a
.
V
-
v0
)
<
SMALLNUM
).
all
()
a
.
_evalPolicyIterative
()
assert
(
absolute
(
a
.
V
-
v1
)
<
SMALLNUM
).
all
()
...
...
@@ -321,14 +319,14 @@ def test_PolicyIteration_evalPolicyIterative_exampleForest():
def
test_PolicyIteration_evalPolicyIterative_bellmanOperator_exampleForest
():
v
=
matrix
(
'4.47504640074458; 5.02753258879703; 23.17234211944304'
)
p
=
matrix
(
'0; 0; 0'
)
a
=
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
.
_evalPolicyIterative
()
policy
,
value
=
a
.
_bellmanOperator
()
assert
(
policy
==
p
).
all
()
assert
(
absolute
(
a
.
V
-
v
)
<
SMALLNUM
).
all
()
def
test_PolicyIteration_iterative_exampleForest
():
a
=
PolicyIteration
(
Pf
,
Rf
,
0.9
,
eval_type
=
1
)
a
=
mdp
.
PolicyIteration
(
Pf
,
Rf
,
0.9
,
eval_type
=
1
)
v
=
matrix
(
'26.2439058351861 29.4839058351861 33.4839058351861'
)
p
=
matrix
(
'0 0 0'
)
itr
=
2
...
...
@@ -339,12 +337,12 @@ def test_PolicyIteration_iterative_exampleForest():
def
test_PolicyIteration_evalPolicyMatrix_exampleForest
():
v_pol
=
matrix
(
'4.47513812154696; 5.02762430939227; 23.17243384704857'
)
a
=
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
.
_evalPolicyMatrix
()
assert
(
absolute
(
a
.
V
-
v_pol
)
<
SMALLNUM
).
all
()
def
test_PolicyIteration_matrix_exampleForest
():
a
=
PolicyIteration
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
PolicyIteration
(
Pf
,
Rf
,
0.9
)
v
=
matrix
(
'26.2440000000000 29.4840000000000 33.4840000000000'
)
p
=
matrix
(
'0 0 0'
)
itr
=
2
...
...
@@ -357,7 +355,7 @@ def test_PolicyIteration_matrix_exampleForest():
def
test_QLearning
():
randseed
(
0
)
a
=
QLearning
(
P
,
R
,
0.9
)
a
=
mdp
.
QLearning
(
P
,
R
,
0.9
)
q
=
matrix
(
'36.63245946346517 42.24434307022128; '
\
'35.96582807367007 32.70456417451635'
)
v
=
matrix
(
'42.24434307022128 35.96582807367007'
)
...
...
@@ -368,7 +366,7 @@ def test_QLearning():
assert
(
array
(
a
.
policy
)
==
p
).
all
()
def
test_QLearning_exampleForest
():
a
=
QLearning
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
QLearning
(
Pf
,
Rf
,
0.9
)
#q = matrix('26.1841860892231 18.6273657021260; ' \
# '29.5880960371007 18.5901207622881; '\
# '33.3526406657418 25.2621054631519')
...
...
@@ -382,7 +380,7 @@ def test_QLearning_exampleForest():
# RelativeValueIteration
def
test_RelativeValueIteration_dense
():
a
=
RelativeValueIteration
(
P
,
R
)
a
=
mdp
.
RelativeValueIteration
(
P
,
R
)
p
=
matrix
(
'1 0'
)
ar
=
3.88523524641183
itr
=
29
...
...
@@ -392,7 +390,7 @@ def test_RelativeValueIteration_dense():
assert
absolute
(
a
.
average_reward
-
ar
)
<
SMALLNUM
def
test_RelativeValueIteration_sparse
():
a
=
RelativeValueIteration
(
Ps
,
R
)
a
=
mdp
.
RelativeValueIteration
(
Ps
,
R
)
p
=
matrix
(
'1 0'
)
ar
=
3.88523524641183
itr
=
29
...
...
@@ -402,7 +400,7 @@ def test_RelativeValueIteration_sparse():
assert
absolute
(
a
.
average_reward
-
ar
)
<
SMALLNUM
def
test_RelativeValueIteration_exampleForest
():
a
=
RelativeValueIteration
(
Pf
,
Rf
)
a
=
mdp
.
RelativeValueIteration
(
Pf
,
Rf
)
itr
=
4
p
=
matrix
(
'0 0 0'
)
#v = matrix('-4.360000000000000 -0.760000000000000 3.240000000000000')
...
...
@@ -416,18 +414,18 @@ def test_RelativeValueIteration_exampleForest():
# ValueIteration
def
test_ValueIteration_boundIter
():
inst
=
ValueIteration
(
P
,
R
,
0.9
,
0.01
)
inst
=
mdp
.
ValueIteration
(
P
,
R
,
0.9
,
0.01
)
assert
(
inst
.
max_iter
==
28
)
def
test_ValueIteration_iterate
():
inst
=
ValueIteration
(
P
,
R
,
0.9
,
0.01
)
inst
=
mdp
.
ValueIteration
(
P
,
R
,
0.9
,
0.01
)
inst
.
iterate
()
assert
(
inst
.
V
==
(
40.048625392716822
,
33.65371175967546
))
assert
(
inst
.
policy
==
(
1
,
0
))
assert
(
inst
.
iter
==
26
)
def
test_ValueIteration_exampleForest
():
a
=
ValueIteration
(
Pf
,
Rf
,
0.96
)
a
=
mdp
.
ValueIteration
(
Pf
,
Rf
,
0.96
)
a
.
iterate
()
assert
(
a
.
policy
==
array
([
0
,
0
,
0
])).
all
()
assert
a
.
iter
==
4
...
...
@@ -435,12 +433,12 @@ def test_ValueIteration_exampleForest():
# ValueIterationGS
def
test_ValueIterationGS_boundIter_exampleForest
():
a
=
ValueIterationGS
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
ValueIterationGS
(
Pf
,
Rf
,
0.9
)
itr
=
39
assert
(
a
.
max_iter
==
itr
)
def
test_ValueIterationGS_exampleForest
():
a
=
ValueIterationGS
(
Pf
,
Rf
,
0.9
)
a
=
mdp
.
ValueIterationGS
(
Pf
,
Rf
,
0.9
)
p
=
matrix
(
'0 0 0'
)
v
=
matrix
(
'25.5833879767579 28.8306546355469 32.8306546355469'
)
itr
=
33
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment