Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
c5adb9ea
Commit
c5adb9ea
authored
Sep 11, 2013
by
Steven Cordwell
Browse files
more complete tests
parent
063359b9
Changes
8
Show whitespace changes
Inline
Side-by-side
src/tests/test_FiniteHorizon.py
0 → 100644
View file @
c5adb9ea
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:33:47 2013
@author: steve
"""
src/tests/test_LP.py
0 → 100644
View file @
c5adb9ea
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:35:08 2013
@author: steve
"""
src/tests/test_PolicyIteration.py
View file @
c5adb9ea
...
...
@@ -9,65 +9,114 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
,
P_sparse
from
utils
import
P_forest_sparse
,
R_forest_sparse
def
test_PolicyIteration_init_policy0
():
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_small
,
R_small
,
0.9
)
p
=
np
.
matrix
(
'1;
1
'
)
assert
(
a
.
policy
==
p
).
all
()
def
test_PolicyIteration_init_policy0_
exampleF
orest
():
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
p
=
np
.
matrix
(
'
0, 1, 0
'
)
assert
(
a
.
policy
==
p
).
all
()
def
test_PolicyIteration_computePpolicyPRpolicy_
exampleF
orest
():
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
P1
=
np
.
matrix
(
'0.1 0.9 0; 1 0 0; 0.1 0 0.9'
)
R1
=
np
.
matrix
(
'
0, 1, 4
'
)
Ppolicy
,
Rpolicy
=
a
.
_computePpolicyPRpolicy
()
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_small
,
R_small
,
0.9
)
p
=
np
.
array
([
1
,
1
]
)
assert
(
sdp
.
policy
==
p
).
all
()
def
test_PolicyIteration_init_policy0_
f
orest
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
p
=
np
.
array
([
0
,
1
,
0
]
)
assert
(
sdp
.
policy
==
p
).
all
()
def
test_PolicyIteration_computePpolicyPRpolicy_
f
orest
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
P1
=
np
.
matrix
(
'0.1
,
0.9
,
0; 1
,
0
,
0; 0.1
,
0
,
0.9'
)
.
A
R1
=
np
.
array
([
0
,
1
,
4
]
)
Ppolicy
,
Rpolicy
=
sdp
.
_computePpolicyPRpolicy
()
assert
(
np
.
absolute
(
Ppolicy
-
P1
)
<
SMALLNUM
).
all
()
assert
(
np
.
absolute
(
Rpolicy
-
R1
)
<
SMALLNUM
).
all
()
def
test_PolicyIteration_evalPolicyIterative_
exampleF
orest
():
v0
=
np
.
matrix
(
'
0, 0, 0
'
)
v1
=
np
.
matrix
(
'
4.47504640074458, 5.02753258879703, 23.17234211944304
'
)
p
=
np
.
matrix
(
'
0, 1, 0
'
)
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
assert
(
np
.
absolute
(
a
.
V
-
v0
)
<
SMALLNUM
).
all
()
a
.
_evalPolicyIterative
()
assert
(
np
.
absolute
(
a
.
V
-
v1
)
<
SMALLNUM
).
all
()
assert
(
a
.
policy
==
p
).
all
()
def
test_PolicyIteration_evalPolicyIterative_bellmanOperator_
exampleF
orest
():
v
=
np
.
matrix
(
'
4.47504640074458, 5.02753258879703, 23.17234211944304
'
)
p
=
np
.
matrix
(
'
0, 0, 0
'
)
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
a
.
_evalPolicyIterative
()
policy
,
value
=
a
.
_bellmanOperator
()
def
test_PolicyIteration_evalPolicyIterative_
f
orest
():
v0
=
np
.
array
([
0
,
0
,
0
]
)
v1
=
np
.
array
([
4.47504640074458
,
5.02753258879703
,
23.17234211944304
]
)
p
=
np
.
array
([
0
,
1
,
0
]
)
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
assert
(
np
.
absolute
(
sdp
.
V
-
v0
)
<
SMALLNUM
).
all
()
sdp
.
_evalPolicyIterative
()
assert
(
np
.
absolute
(
sdp
.
V
-
v1
)
<
SMALLNUM
).
all
()
assert
(
sdp
.
policy
==
p
).
all
()
def
test_PolicyIteration_evalPolicyIterative_bellmanOperator_
f
orest
():
v
=
np
.
array
([
4.47504640074458
,
5.02753258879703
,
23.17234211944304
]
)
p
=
np
.
array
([
0
,
0
,
0
]
)
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
sdp
.
_evalPolicyIterative
()
policy
,
value
=
sdp
.
_bellmanOperator
()
assert
(
policy
==
p
).
all
()
assert
(
np
.
absolute
(
a
.
V
-
v
)
<
SMALLNUM
).
all
()
assert
(
np
.
absolute
(
sdp
.
V
-
v
)
<
SMALLNUM
).
all
()
def
test_PolicyIteration_iterative_forest
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
,
eval_type
=
1
)
sdp
.
run
()
v
=
np
.
array
([
26.2439058351861
,
29.4839058351861
,
33.4839058351861
])
p
=
(
0
,
0
,
0
)
itr
=
2
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
sdp
.
policy
==
p
assert
sdp
.
iter
==
itr
def
test_PolicyIteration_iterative_exampleForest
():
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
,
eval_type
=
1
)
v
=
np
.
matrix
(
'26.2439058351861, 29.4839058351861, 33.4839058351861'
)
p
=
np
.
matrix
(
'0 0 0'
)
def
test_PolicyIteration_evalPolicyMatrix_forest
():
v_pol
=
np
.
matrix
([
4.47513812154696
,
5.02762430939227
,
23.17243384704857
])
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
sdp
.
_evalPolicyMatrix
()
assert
(
np
.
absolute
(
sdp
.
V
-
v_pol
)
<
SMALLNUM
).
all
()
def
test_PolicyIteration_matrix_forest
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
sdp
.
run
()
v
=
np
.
matrix
([
26.2440000000000
,
29.4840000000000
,
33.4840000000000
])
p
=
(
0
,
0
,
0
)
itr
=
2
assert
(
np
.
absolute
(
np
.
array
(
a
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
(
np
.
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
def
test_PolicyIteration_evalPolicyMatrix_exampleForest
():
v_pol
=
np
.
matrix
(
'4.47513812154696, 5.02762430939227, 23.17243384704857'
)
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
a
.
_evalPolicynp
.
matrix
()
assert
(
np
.
absolute
(
a
.
V
-
v_pol
)
<
SMALLNUM
).
all
()
def
test_PolicyIteration_matrix_exampleForest
():
a
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.9
)
v
=
np
.
matrix
(
'26.2440000000000, 29.4840000000000, 33.4840000000000'
)
p
=
np
.
matrix
(
'0 0 0'
)
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
sdp
.
policy
==
p
assert
sdp
.
iter
==
itr
def
test_PolicyIteration_small
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_small
,
R_small
,
0.9
)
sdp
.
run
()
v
=
np
.
array
([
42.4418604651163
,
36.0465116279070
])
# from Octave MDPtoolbox
p
=
(
1
,
0
)
# from Octave MDPtoolbox
itr
=
2
# from Octave MDPtoolbox
assert
sdp
.
policy
==
p
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
sdp
.
iter
==
itr
def
test_PolicyIteration_small_sparse
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_sparse
,
R_small
,
0.9
)
sdp
.
run
()
v
=
np
.
array
([
42.4418604651163
,
36.0465116279070
])
# from Octave MDPtoolbox
p
=
(
1
,
0
)
# from Octave MDPtoolbox
itr
=
2
# from Octave MDPtoolbox
assert
sdp
.
policy
==
p
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
sdp
.
iter
==
itr
def
test_PolicyIterative_forest
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest
,
R_forest
,
0.96
)
sdp
.
run
()
# v, p and itr from Octave MDPtoolbox
v
=
np
.
array
([
74.6496000000000
,
78.1056000000000
,
82.1056000000000
])
p
=
(
0
,
0
,
0
)
itr
=
2
assert
(
np
.
absolute
(
np
.
array
(
a
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
(
np
.
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
assert
sdp
.
policy
==
p
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
sdp
.
iter
==
itr
def
test_PolicyIterative_forest_sparse
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_forest_sparse
,
R_forest_sparse
,
0.96
)
sdp
.
run
()
# v, p and itr from Octave MDPtoolbox
v
=
np
.
array
([
26.8301859311444
,
28.0723241686974
,
29.5099841658652
,
31.1739424959205
,
33.0998201927438
,
35.3288453048078
,
37.9087354808078
,
40.8947194808078
,
44.3507194808078
,
48.3507194808078
])
p
=
(
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
)
itr
=
9
assert
sdp
.
policy
==
p
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
sdp
.
iter
==
itr
src/tests/test_PolicyIterationModified.py
0 → 100644
View file @
c5adb9ea
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:33:16 2013
@author: steve
"""
src/tests/test_QLearning.py
View file @
c5adb9ea
...
...
@@ -10,40 +10,48 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
utils
import
R_forest_sparse
,
P_small
,
R_small
from
utils
import
R_forest_sparse
,
P_small
,
R_small
,
P_sparse
def
test_QLearning_small
():
np
.
random
.
seed
(
0
)
sdp
=
mdptoolbox
.
mdp
.
QLearning
(
P_small
,
R_small
,
0.9
)
sdp
.
run
()
q
=
np
.
matrix
(
"33.330108655211646, 40.82109564847122; "
"34.37431040682546, 29.672368452303164"
)
v
=
np
.
matrix
(
"40.82109564847122, 34.37431040682546"
)
p
=
np
.
matrix
(
"1
0
"
)
p
=
(
1
,
0
)
assert
(
np
.
absolute
(
sdp
.
Q
-
q
)
<
SMALLNUM
).
all
()
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
(
np
.
array
(
sdp
.
policy
)
==
p
).
all
()
assert
sdp
.
policy
==
p
def
test_QLearning_small_sparse
():
np
.
random
.
seed
(
0
)
sdp
=
mdptoolbox
.
mdp
.
QLearning
(
P_sparse
,
R_small
,
0.9
)
sdp
.
run
()
q
=
np
.
matrix
(
"33.330108655211646, 40.82109564847122; "
"34.37431040682546, 29.672368452303164"
)
v
=
np
.
matrix
(
"40.82109564847122, 34.37431040682546"
)
p
=
(
1
,
0
)
assert
(
np
.
absolute
(
sdp
.
Q
-
q
)
<
SMALLNUM
).
all
()
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
sdp
.
policy
==
p
def
test_QLearning_forest
():
np
.
random
.
seed
(
0
)
sdp
=
mdptoolbox
.
mdp
.
QLearning
(
P_forest
,
R_forest
,
0.96
)
sdp
.
run
()
q
=
np
.
matrix
(
"11.198908998901134, 10.34652034142302; "
"10.74229967143465, 11.741057920409865; "
"2.8698000059458546, 12.259732864170232"
)
v
=
np
.
matrix
(
"11.198908998901134, 11.741057920409865, 12.259732864170232"
)
p
=
np
.
matrix
(
"0
1 1
"
)
p
=
(
0
,
1
,
1
)
assert
(
np
.
absolute
(
sdp
.
Q
-
q
)
<
SMALLNUM
).
all
()
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
(
np
.
array
(
sdp
.
policy
)
==
p
).
all
()
assert
sdp
.
policy
==
p
#FIXME: This is wrong as the number of states in this is util.STATES, not 3
def
test_QLearning_forest_sparse
():
np
.
random
.
seed
(
0
)
sdp
=
mdptoolbox
.
mdp
.
QLearning
(
P_forest_sparse
,
R_forest_sparse
,
0.96
)
q
=
np
.
matrix
(
"11.198908998901134, 10.34652034142302; "
"10.74229967143465, 11.741057920409865; "
"2.8698000059458546, 12.259732864170232"
)
v
=
np
.
matrix
(
"11.198908998901134, 11.741057920409865, 12.259732864170232"
)
p
=
np
.
matrix
(
"0 1 1"
)
assert
(
np
.
absolute
(
sdp
.
Q
-
q
)
<
SMALLNUM
).
all
()
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
(
np
.
array
(
sdp
.
policy
)
==
p
).
all
()
sdp
.
run
()
p
=
(
0
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
)
assert
sdp
.
policy
==
p
src/tests/test_RelativeValueIteration.py
View file @
c5adb9ea
...
...
@@ -9,33 +9,38 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
,
P_sparse
def
test_RelativeValueIteration_dense
():
a
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_small
,
R_small
)
p
=
np
.
matrix
(
'1 0'
)
ar
=
3.88523524641183
itr
=
29
assert
(
np
.
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
assert
np
.
absolute
(
a
.
average_reward
-
ar
)
<
SMALLNUM
def
test_RelativeValueIteration_sparse
():
a
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_sparse
,
R_small
)
p
=
np
.
matrix
(
'1 0'
)
ar
=
3.88523524641183
itr
=
29
assert
(
np
.
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
assert
np
.
absolute
(
a
.
average_reward
-
ar
)
<
SMALLNUM
def
test_RelativeValueIteration_exampleForest
():
a
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_forest
,
R_forest
)
itr
=
4
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
utils
import
R_forest_sparse
,
P_small
,
R_small
,
P_sparse
def
test_RelativeValueIteration_small
():
sdp
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_small
,
R_small
)
sdp
.
run
()
p
=
np
.
matrix
(
'1 0'
)
ar
=
3.88523524641183
# from Octave MDPtoolbox
assert
(
np
.
array
(
sdp
.
policy
)
==
p
).
all
()
assert
np
.
absolute
(
sdp
.
average_reward
-
ar
)
<
SMALLNUM
def
test_RelativeValueIteration_small_sparse
():
sdp
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_sparse
,
R_small
)
sdp
.
run
()
p
=
np
.
matrix
(
'1 0'
)
ar
=
3.88523524641183
# from Octave MDPtoolbox
assert
(
np
.
array
(
sdp
.
policy
)
==
p
).
all
()
assert
np
.
absolute
(
sdp
.
average_reward
-
ar
)
<
SMALLNUM
def
test_RelativeValueIteration_forest
():
sdp
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_forest
,
R_forest
)
sdp
.
run
()
p
=
np
.
matrix
(
'0 0 0'
)
#v = np.matrix('-4.360000000000000 -0.760000000000000 3.240000000000000')
ar
=
2.43000000000000
assert
(
np
.
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
#assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all()
assert
np
.
absolute
(
a
.
average_reward
-
ar
)
<
SMALLNUM
ar
=
3.24000000000000
# from Octave MDPtoolbox
assert
(
np
.
array
(
sdp
.
policy
)
==
p
).
all
()
assert
np
.
absolute
(
sdp
.
average_reward
-
ar
)
<
SMALLNUM
def
test_RelativeValueIteration_forest_sparse
():
sdp
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_forest_sparse
,
R_forest_sparse
)
sdp
.
run
()
p
=
np
.
matrix
(
'0 0 0 0 0 0 0 0 0 0'
)
ar
=
1.54968195600000
# from Octave MDPtoolbox
assert
(
np
.
array
(
sdp
.
policy
)
==
p
).
all
()
assert
np
.
absolute
(
sdp
.
average_reward
-
ar
)
<
SMALLNUM
src/tests/test_ValueIteration.py
View file @
c5adb9ea
...
...
@@ -9,32 +9,52 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
STATES
,
P_forest
,
R_forest
,
P_forest_sparse
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
utils
import
R_forest_sparse
,
P_rand
,
R_rand
,
P_rand_sparse
,
R_rand_sparse
from
utils
import
P_small
,
R_small
from
utils
import
P_small
,
R_small
,
P_sparse
def
test_ValueIteration_small
():
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_small
,
R_small
,
0.9
,
0.01
)
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_small
,
R_small
,
0.9
)
sdp
.
run
()
v
=
np
.
array
((
40.048625392716822
,
33.65371175967546
))
p
=
(
1
,
0
)
itr
=
26
assert
(
sdp
.
max_iter
==
28
)
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
(
sdp
.
policy
==
(
1
,
0
))
assert
(
sdp
.
iter
==
26
)
assert
(
sdp
.
policy
==
p
)
assert
(
sdp
.
iter
==
itr
)
def
test_ValueIteration_small_sparse
():
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_sparse
,
R_small
,
0.9
)
sdp
.
run
()
v
=
np
.
array
((
40.048625392716822
,
33.65371175967546
))
p
=
(
1
,
0
)
itr
=
26
assert
(
sdp
.
max_iter
==
28
)
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
assert
(
sdp
.
policy
==
p
)
assert
(
sdp
.
iter
==
itr
)
def
test_ValueIteration_forest
():
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_forest
,
R_forest
,
0.96
)
assert
(
np
.
array
(
sdp
.
policy
)
==
np
.
array
([
0
,
0
,
0
])).
all
()
sdp
.
run
()
p
=
(
0
,
0
,
0
)
assert
sdp
.
policy
==
p
assert
sdp
.
iter
==
4
def
test_ValueIteration_forest_sparse
():
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_forest_sparse
,
R_forest_sparse
,
0.96
)
assert
(
np
.
array
(
sdp
.
policy
)
==
np
.
array
([
0
]
*
STATES
)).
all
()
sdp
.
run
()
p
=
(
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
)
assert
sdp
.
policy
==
p
assert
sdp
.
iter
==
14
def
test_ValueIteration_rand
():
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_rand
,
R_rand
,
0.9
)
sdp
.
run
()
assert
sdp
.
policy
def
test_ValueIteration_rand_sparse
():
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_rand_sparse
,
R_rand_sparse
,
0.9
)
sdp
.
run
()
assert
sdp
.
policy
src/tests/test_ValueIterationGS.py
View file @
c5adb9ea
...
...
@@ -9,18 +9,45 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
def
test_ValueIterationGS_boundIter_exampleForest
():
a
=
mdptoolbox
.
mdp
.
ValueIterationGS
(
P_forest
,
R_forest
,
0.9
)
itr
=
39
assert
(
a
.
max_iter
==
itr
)
def
test_ValueIterationGS_exampleForest
():
a
=
mdptoolbox
.
mdp
.
ValueIterationGS
(
P_forest
,
R_forest
,
0.9
)
p
=
np
.
matrix
(
'0 0 0'
)
v
=
np
.
matrix
(
'25.5833879767579 28.8306546355469 32.8306546355469'
)
itr
=
33
assert
(
np
.
array
(
a
.
policy
)
==
p
).
all
()
assert
a
.
iter
==
itr
assert
(
np
.
absolute
(
np
.
array
(
a
.
V
)
-
v
)
<
SMALLNUM
).
all
()
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
,
P_sparse
from
utils
import
P_forest_sparse
,
R_forest_sparse
def
test_ValueIterationGS_small
():
sdp
=
mdptoolbox
.
mdp
.
ValueIterationGS
(
P_small
,
R_small
,
0.9
)
sdp
.
run
()
p
=
(
1
,
0
)
itr
=
28
# from Octave MDPtoolbox
v
=
np
.
matrix
(
'42.27744026138212, 35.89524504047155'
)
assert
sdp
.
iter
==
itr
assert
sdp
.
policy
==
p
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
def
test_ValueIterationGS_small_sparse
():
sdp
=
mdptoolbox
.
mdp
.
ValueIterationGS
(
P_sparse
,
R_small
,
0.9
)
sdp
.
run
()
p
=
(
1
,
0
)
itr
=
28
# from Octave MDPtoolbox
v
=
np
.
matrix
(
'42.27744026138212, 35.89524504047155'
)
assert
sdp
.
iter
==
itr
assert
sdp
.
policy
==
p
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
def
test_ValueIterationGS_forest
():
sdp
=
mdptoolbox
.
mdp
.
ValueIterationGS
(
P_forest
,
R_forest
,
0.96
)
sdp
.
run
()
p
=
(
0
,
0
,
0
)
v
=
np
.
matrix
(
'69.98910821400665, 73.46560194552877, 77.46560194552877'
)
itr
=
63
# from Octave MDPtoolbox
assert
sdp
.
max_iter
==
63
assert
sdp
.
policy
==
p
assert
sdp
.
iter
==
itr
assert
(
np
.
absolute
(
np
.
array
(
sdp
.
V
)
-
v
)
<
SMALLNUM
).
all
()
def
test_ValueIterationGS_forest_sparse
():
sdp
=
mdptoolbox
.
mdp
.
ValueIterationGS
(
P_forest_sparse
,
R_forest_sparse
,
0.96
)
sdp
.
run
()
p
=
(
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
)
itr
=
16
# from Octave MDPtoolbox
assert
sdp
.
policy
==
p
assert
sdp
.
iter
==
itr
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment