Commit c5adb9ea authored by Steven Cordwell's avatar Steven Cordwell
Browse files

more complete tests

parent 063359b9
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:33:47 2013
@author: steve
"""
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:35:08 2013
@author: steve
"""
...@@ -9,65 +9,114 @@ import numpy as np ...@@ -9,65 +9,114 @@ import numpy as np
import mdptoolbox import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest, P_small, R_small from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse
from utils import P_forest_sparse, R_forest_sparse
def test_PolicyIteration_init_policy0(): def test_PolicyIteration_init_policy0():
a = mdptoolbox.mdp.PolicyIteration(P_small, R_small, 0.9) sdp = mdptoolbox.mdp.PolicyIteration(P_small, R_small, 0.9)
p = np.matrix('1; 1') p = np.array([1, 1])
assert (a.policy == p).all() assert (sdp.policy == p).all()
def test_PolicyIteration_init_policy0_exampleForest(): def test_PolicyIteration_init_policy0_forest():
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9) sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
p = np.matrix('0, 1, 0') p = np.array([0, 1, 0])
assert (a.policy == p).all() assert (sdp.policy == p).all()
def test_PolicyIteration_computePpolicyPRpolicy_exampleForest(): def test_PolicyIteration_computePpolicyPRpolicy_forest():
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9) sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
P1 = np.matrix('0.1 0.9 0; 1 0 0; 0.1 0 0.9') P1 = np.matrix('0.1, 0.9, 0; 1, 0, 0; 0.1, 0, 0.9').A
R1 = np.matrix('0, 1, 4') R1 = np.array([0, 1, 4])
Ppolicy, Rpolicy = a._computePpolicyPRpolicy() Ppolicy, Rpolicy = sdp._computePpolicyPRpolicy()
assert (np.absolute(Ppolicy - P1) < SMALLNUM).all() assert (np.absolute(Ppolicy - P1) < SMALLNUM).all()
assert (np.absolute(Rpolicy - R1) < SMALLNUM).all() assert (np.absolute(Rpolicy - R1) < SMALLNUM).all()
def test_PolicyIteration_evalPolicyIterative_exampleForest(): def test_PolicyIteration_evalPolicyIterative_forest():
v0 = np.matrix('0, 0, 0') v0 = np.array([0, 0, 0])
v1 = np.matrix('4.47504640074458, 5.02753258879703, 23.17234211944304') v1 = np.array([4.47504640074458, 5.02753258879703, 23.17234211944304])
p = np.matrix('0, 1, 0') p = np.array([0, 1, 0])
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9) sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
assert (np.absolute(a.V - v0) < SMALLNUM).all() assert (np.absolute(sdp.V - v0) < SMALLNUM).all()
a._evalPolicyIterative() sdp._evalPolicyIterative()
assert (np.absolute(a.V - v1) < SMALLNUM).all() assert (np.absolute(sdp.V - v1) < SMALLNUM).all()
assert (a.policy == p).all() assert (sdp.policy == p).all()
def test_PolicyIteration_evalPolicyIterative_bellmanOperator_exampleForest(): def test_PolicyIteration_evalPolicyIterative_bellmanOperator_forest():
v = np.matrix('4.47504640074458, 5.02753258879703, 23.17234211944304') v = np.array([4.47504640074458, 5.02753258879703, 23.17234211944304])
p = np.matrix('0, 0, 0') p = np.array([0, 0, 0])
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9) sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
a._evalPolicyIterative() sdp._evalPolicyIterative()
policy, value = a._bellmanOperator() policy, value = sdp._bellmanOperator()
assert (policy == p).all() assert (policy == p).all()
assert (np.absolute(a.V - v) < SMALLNUM).all() assert (np.absolute(sdp.V - v) < SMALLNUM).all()
def test_PolicyIteration_iterative_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9, eval_type=1)
sdp.run()
v = np.array([26.2439058351861, 29.4839058351861, 33.4839058351861])
p = (0, 0, 0)
itr = 2
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.policy == p
assert sdp.iter == itr
def test_PolicyIteration_iterative_exampleForest(): def test_PolicyIteration_evalPolicyMatrix_forest():
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9, eval_type=1) v_pol = np.matrix([4.47513812154696, 5.02762430939227, 23.17243384704857])
v = np.matrix('26.2439058351861, 29.4839058351861, 33.4839058351861') sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
p = np.matrix('0 0 0') sdp._evalPolicyMatrix()
assert (np.absolute(sdp.V - v_pol) < SMALLNUM).all()
def test_PolicyIteration_matrix_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
sdp.run()
v = np.matrix([26.2440000000000, 29.4840000000000, 33.4840000000000])
p = (0, 0, 0)
itr = 2 itr = 2
assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all() assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(a.policy) == p).all() assert sdp.policy == p
assert a.iter == itr assert sdp.iter == itr
def test_PolicyIteration_evalPolicyMatrix_exampleForest(): def test_PolicyIteration_small():
v_pol = np.matrix('4.47513812154696, 5.02762430939227, 23.17243384704857') sdp = mdptoolbox.mdp.PolicyIteration(P_small, R_small, 0.9)
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9) sdp.run()
a._evalPolicynp.matrix() v = np.array([42.4418604651163, 36.0465116279070]) # from Octave MDPtoolbox
assert (np.absolute(a.V - v_pol) < SMALLNUM).all() p = (1, 0) # from Octave MDPtoolbox
itr = 2 # from Octave MDPtoolbox
def test_PolicyIteration_matrix_exampleForest(): assert sdp.policy == p
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9) assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
v = np.matrix('26.2440000000000, 29.4840000000000, 33.4840000000000') assert sdp.iter == itr
p = np.matrix('0 0 0')
def test_PolicyIteration_small_sparse():
sdp = mdptoolbox.mdp.PolicyIteration(P_sparse, R_small, 0.9)
sdp.run()
v = np.array([42.4418604651163, 36.0465116279070]) # from Octave MDPtoolbox
p = (1, 0) # from Octave MDPtoolbox
itr = 2 # from Octave MDPtoolbox
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.iter == itr
def test_PolicyIterative_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.96)
sdp.run()
# v, p and itr from Octave MDPtoolbox
v = np.array([74.6496000000000, 78.1056000000000, 82.1056000000000])
p = (0, 0, 0)
itr = 2 itr = 2
assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all() assert sdp.policy == p
assert (np.array(a.policy) == p).all() assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert a.iter == itr assert sdp.iter == itr
def test_PolicyIterative_forest_sparse():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest_sparse, R_forest_sparse,
0.96)
sdp.run()
# v, p and itr from Octave MDPtoolbox
v = np.array([26.8301859311444, 28.0723241686974, 29.5099841658652,
31.1739424959205, 33.0998201927438, 35.3288453048078,
37.9087354808078, 40.8947194808078, 44.3507194808078,
48.3507194808078])
p = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
itr = 9
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.iter == itr
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:33:16 2013
@author: steve
"""
...@@ -10,40 +10,48 @@ import numpy as np ...@@ -10,40 +10,48 @@ import numpy as np
import mdptoolbox import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse
from utils import R_forest_sparse, P_small, R_small from utils import R_forest_sparse, P_small, R_small, P_sparse
def test_QLearning_small(): def test_QLearning_small():
np.random.seed(0) np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_small, R_small, 0.9) sdp = mdptoolbox.mdp.QLearning(P_small, R_small, 0.9)
sdp.run()
q = np.matrix("33.330108655211646, 40.82109564847122; " q = np.matrix("33.330108655211646, 40.82109564847122; "
"34.37431040682546, 29.672368452303164") "34.37431040682546, 29.672368452303164")
v = np.matrix("40.82109564847122, 34.37431040682546") v = np.matrix("40.82109564847122, 34.37431040682546")
p = np.matrix("1 0") p = (1, 0)
assert (np.absolute(sdp.Q - q) < SMALLNUM).all() assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all() assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(sdp.policy) == p).all() assert sdp.policy == p
def test_QLearning_small_sparse():
np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_sparse, R_small, 0.9)
sdp.run()
q = np.matrix("33.330108655211646, 40.82109564847122; "
"34.37431040682546, 29.672368452303164")
v = np.matrix("40.82109564847122, 34.37431040682546")
p = (1, 0)
assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.policy == p
def test_QLearning_forest(): def test_QLearning_forest():
np.random.seed(0) np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_forest, R_forest, 0.96) sdp = mdptoolbox.mdp.QLearning(P_forest, R_forest, 0.96)
sdp.run()
q = np.matrix("11.198908998901134, 10.34652034142302; " q = np.matrix("11.198908998901134, 10.34652034142302; "
"10.74229967143465, 11.741057920409865; " "10.74229967143465, 11.741057920409865; "
"2.8698000059458546, 12.259732864170232") "2.8698000059458546, 12.259732864170232")
v = np.matrix("11.198908998901134, 11.741057920409865, 12.259732864170232") v = np.matrix("11.198908998901134, 11.741057920409865, 12.259732864170232")
p = np.matrix("0 1 1") p = (0, 1, 1)
assert (np.absolute(sdp.Q - q) < SMALLNUM).all() assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all() assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(sdp.policy) == p).all() assert sdp.policy == p
#FIXME: This is wrong as the number of states in this is util.STATES, not 3
def test_QLearning_forest_sparse(): def test_QLearning_forest_sparse():
np.random.seed(0) np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_forest_sparse, R_forest_sparse, 0.96) sdp = mdptoolbox.mdp.QLearning(P_forest_sparse, R_forest_sparse, 0.96)
q = np.matrix("11.198908998901134, 10.34652034142302; " sdp.run()
"10.74229967143465, 11.741057920409865; " p = (0, 1, 1, 1, 1, 1, 0, 0, 0, 0)
"2.8698000059458546, 12.259732864170232") assert sdp.policy == p
v = np.matrix("11.198908998901134, 11.741057920409865, 12.259732864170232")
p = np.matrix("0 1 1")
assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(sdp.policy) == p).all()
...@@ -9,33 +9,38 @@ import numpy as np ...@@ -9,33 +9,38 @@ import numpy as np
import mdptoolbox import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse
from utils import R_forest_sparse, P_small, R_small, P_sparse
def test_RelativeValueIteration_dense():
a = mdptoolbox.mdp.RelativeValueIteration(P_small, R_small) def test_RelativeValueIteration_small():
p= np.matrix('1 0') sdp = mdptoolbox.mdp.RelativeValueIteration(P_small, R_small)
ar = 3.88523524641183 sdp.run()
itr = 29 p = np.matrix('1 0')
assert (np.array(a.policy) == p).all() ar = 3.88523524641183 # from Octave MDPtoolbox
assert a.iter == itr assert (np.array(sdp.policy) == p).all()
assert np.absolute(a.average_reward - ar) < SMALLNUM assert np.absolute(sdp.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_sparse(): def test_RelativeValueIteration_small_sparse():
a = mdptoolbox.mdp.RelativeValueIteration(P_sparse, R_small) sdp = mdptoolbox.mdp.RelativeValueIteration(P_sparse, R_small)
p= np.matrix('1 0') sdp.run()
ar = 3.88523524641183 p = np.matrix('1 0')
itr = 29 ar = 3.88523524641183 # from Octave MDPtoolbox
assert (np.array(a.policy) == p).all() assert (np.array(sdp.policy) == p).all()
assert a.iter == itr assert np.absolute(sdp.average_reward - ar) < SMALLNUM
assert np.absolute(a.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_forest():
def test_RelativeValueIteration_exampleForest(): sdp = mdptoolbox.mdp.RelativeValueIteration(P_forest, R_forest)
a = mdptoolbox.mdp.RelativeValueIteration(P_forest, R_forest) sdp.run()
itr = 4
p = np.matrix('0 0 0') p = np.matrix('0 0 0')
#v = np.matrix('-4.360000000000000 -0.760000000000000 3.240000000000000') ar = 3.24000000000000 # from Octave MDPtoolbox
ar = 2.43000000000000 assert (np.array(sdp.policy) == p).all()
assert (np.array(a.policy) == p).all() assert np.absolute(sdp.average_reward - ar) < SMALLNUM
assert a.iter == itr
#assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all() def test_RelativeValueIteration_forest_sparse():
assert np.absolute(a.average_reward - ar) < SMALLNUM sdp = mdptoolbox.mdp.RelativeValueIteration(P_forest_sparse,
R_forest_sparse)
sdp.run()
p = np.matrix('0 0 0 0 0 0 0 0 0 0')
ar = 1.54968195600000 # from Octave MDPtoolbox
assert (np.array(sdp.policy) == p).all()
assert np.absolute(sdp.average_reward - ar) < SMALLNUM
...@@ -9,32 +9,52 @@ import numpy as np ...@@ -9,32 +9,52 @@ import numpy as np
import mdptoolbox import mdptoolbox
from utils import SMALLNUM, STATES, P_forest, R_forest, P_forest_sparse from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse
from utils import R_forest_sparse, P_rand, R_rand, P_rand_sparse, R_rand_sparse from utils import R_forest_sparse, P_rand, R_rand, P_rand_sparse, R_rand_sparse
from utils import P_small, R_small from utils import P_small, R_small, P_sparse
def test_ValueIteration_small(): def test_ValueIteration_small():
sdp = mdptoolbox.mdp.ValueIteration(P_small, R_small, 0.9, 0.01) sdp = mdptoolbox.mdp.ValueIteration(P_small, R_small, 0.9)
sdp.run()
v = np.array((40.048625392716822, 33.65371175967546)) v = np.array((40.048625392716822, 33.65371175967546))
p = (1, 0)
itr = 26
assert (sdp.max_iter == 28) assert (sdp.max_iter == 28)
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all() assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (sdp.policy == (1, 0)) assert (sdp.policy == p)
assert (sdp.iter == 26) assert (sdp.iter == itr)
def test_ValueIteration_small_sparse():
sdp = mdptoolbox.mdp.ValueIteration(P_sparse, R_small, 0.9)
sdp.run()
v = np.array((40.048625392716822, 33.65371175967546))
p = (1, 0)
itr = 26
assert (sdp.max_iter == 28)
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (sdp.policy == p)
assert (sdp.iter == itr)
def test_ValueIteration_forest(): def test_ValueIteration_forest():
sdp = mdptoolbox.mdp.ValueIteration(P_forest, R_forest, 0.96) sdp = mdptoolbox.mdp.ValueIteration(P_forest, R_forest, 0.96)
assert (np.array(sdp.policy) == np.array([0, 0, 0])).all() sdp.run()
p = (0, 0, 0)
assert sdp.policy == p
assert sdp.iter == 4 assert sdp.iter == 4
def test_ValueIteration_forest_sparse(): def test_ValueIteration_forest_sparse():
sdp = mdptoolbox.mdp.ValueIteration(P_forest_sparse, R_forest_sparse, 0.96) sdp = mdptoolbox.mdp.ValueIteration(P_forest_sparse, R_forest_sparse, 0.96)
assert (np.array(sdp.policy) == np.array([0] * STATES)).all() sdp.run()
p = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
assert sdp.policy == p
assert sdp.iter == 14 assert sdp.iter == 14
def test_ValueIteration_rand(): def test_ValueIteration_rand():
sdp = mdptoolbox.mdp.ValueIteration(P_rand, R_rand, 0.9) sdp = mdptoolbox.mdp.ValueIteration(P_rand, R_rand, 0.9)
sdp.run()
assert sdp.policy assert sdp.policy
def test_ValueIteration_rand_sparse(): def test_ValueIteration_rand_sparse():
sdp = mdptoolbox.mdp.ValueIteration(P_rand_sparse, R_rand_sparse, 0.9) sdp = mdptoolbox.mdp.ValueIteration(P_rand_sparse, R_rand_sparse, 0.9)
sdp.run()
assert sdp.policy assert sdp.policy
...@@ -9,18 +9,45 @@ import numpy as np ...@@ -9,18 +9,45 @@ import numpy as np
import mdptoolbox import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse
from utils import P_forest_sparse, R_forest_sparse
def test_ValueIterationGS_boundIter_exampleForest():
a = mdptoolbox.mdp.ValueIterationGS(P_forest, R_forest, 0.9) def test_ValueIterationGS_small():
itr = 39 sdp = mdptoolbox.mdp.ValueIterationGS(P_small, R_small, 0.9)
assert (a.max_iter == itr) sdp.run()
p = (1, 0)
def test_ValueIterationGS_exampleForest(): itr = 28 # from Octave MDPtoolbox
a = mdptoolbox.mdp.ValueIterationGS(P_forest, R_forest, 0.9) v = np.matrix('42.27744026138212, 35.89524504047155')
p = np.matrix('0 0 0') assert sdp.iter == itr
v = np.matrix('25.5833879767579 28.8306546355469 32.8306546355469') assert sdp.policy == p
itr = 33 assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(a.policy) == p).all()
assert a.iter == itr def test_ValueIterationGS_small_sparse():
assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all() sdp = mdptoolbox.mdp.ValueIterationGS(P_sparse, R_small, 0.9)
sdp.run()
p = (1, 0)
itr = 28 # from Octave MDPtoolbox
v = np.matrix('42.27744026138212, 35.89524504047155')
assert sdp.iter == itr
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
def test_ValueIterationGS_forest():
sdp = mdptoolbox.mdp.ValueIterationGS(P_forest, R_forest, 0.96)
sdp.run()
p = (0, 0, 0)
v = np.matrix('69.98910821400665, 73.46560194552877, 77.46560194552877')
itr = 63 # from Octave MDPtoolbox
assert sdp.max_iter == 63
assert sdp.policy == p
assert sdp.iter == itr
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
def test_ValueIterationGS_forest_sparse():
sdp = mdptoolbox.mdp.ValueIterationGS(P_forest_sparse, R_forest_sparse,
0.96)
sdp.run()
p = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
itr = 16 # from Octave MDPtoolbox
assert sdp.policy == p
assert sdp.iter == itr
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment