Commit c5adb9ea authored by Steven Cordwell's avatar Steven Cordwell
Browse files

more complete tests

parent 063359b9
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:33:47 2013
@author: steve
"""
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:35:08 2013
@author: steve
"""
......@@ -9,65 +9,114 @@ import numpy as np
import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest, P_small, R_small
from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse
from utils import P_forest_sparse, R_forest_sparse
def test_PolicyIteration_init_policy0():
a = mdptoolbox.mdp.PolicyIteration(P_small, R_small, 0.9)
p = np.matrix('1; 1')
assert (a.policy == p).all()
def test_PolicyIteration_init_policy0_exampleForest():
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
p = np.matrix('0, 1, 0')
assert (a.policy == p).all()
def test_PolicyIteration_computePpolicyPRpolicy_exampleForest():
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
P1 = np.matrix('0.1 0.9 0; 1 0 0; 0.1 0 0.9')
R1 = np.matrix('0, 1, 4')
Ppolicy, Rpolicy = a._computePpolicyPRpolicy()
sdp = mdptoolbox.mdp.PolicyIteration(P_small, R_small, 0.9)
p = np.array([1, 1])
assert (sdp.policy == p).all()
def test_PolicyIteration_init_policy0_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
p = np.array([0, 1, 0])
assert (sdp.policy == p).all()
def test_PolicyIteration_computePpolicyPRpolicy_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
P1 = np.matrix('0.1, 0.9, 0; 1, 0, 0; 0.1, 0, 0.9').A
R1 = np.array([0, 1, 4])
Ppolicy, Rpolicy = sdp._computePpolicyPRpolicy()
assert (np.absolute(Ppolicy - P1) < SMALLNUM).all()
assert (np.absolute(Rpolicy - R1) < SMALLNUM).all()
def test_PolicyIteration_evalPolicyIterative_exampleForest():
v0 = np.matrix('0, 0, 0')
v1 = np.matrix('4.47504640074458, 5.02753258879703, 23.17234211944304')
p = np.matrix('0, 1, 0')
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
assert (np.absolute(a.V - v0) < SMALLNUM).all()
a._evalPolicyIterative()
assert (np.absolute(a.V - v1) < SMALLNUM).all()
assert (a.policy == p).all()
def test_PolicyIteration_evalPolicyIterative_bellmanOperator_exampleForest():
v = np.matrix('4.47504640074458, 5.02753258879703, 23.17234211944304')
p = np.matrix('0, 0, 0')
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
a._evalPolicyIterative()
policy, value = a._bellmanOperator()
def test_PolicyIteration_evalPolicyIterative_forest():
v0 = np.array([0, 0, 0])
v1 = np.array([4.47504640074458, 5.02753258879703, 23.17234211944304])
p = np.array([0, 1, 0])
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
assert (np.absolute(sdp.V - v0) < SMALLNUM).all()
sdp._evalPolicyIterative()
assert (np.absolute(sdp.V - v1) < SMALLNUM).all()
assert (sdp.policy == p).all()
def test_PolicyIteration_evalPolicyIterative_bellmanOperator_forest():
v = np.array([4.47504640074458, 5.02753258879703, 23.17234211944304])
p = np.array([0, 0, 0])
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
sdp._evalPolicyIterative()
policy, value = sdp._bellmanOperator()
assert (policy == p).all()
assert (np.absolute(a.V - v) < SMALLNUM).all()
assert (np.absolute(sdp.V - v) < SMALLNUM).all()
def test_PolicyIteration_iterative_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9, eval_type=1)
sdp.run()
v = np.array([26.2439058351861, 29.4839058351861, 33.4839058351861])
p = (0, 0, 0)
itr = 2
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.policy == p
assert sdp.iter == itr
def test_PolicyIteration_iterative_exampleForest():
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9, eval_type=1)
v = np.matrix('26.2439058351861, 29.4839058351861, 33.4839058351861')
p = np.matrix('0 0 0')
def test_PolicyIteration_evalPolicyMatrix_forest():
v_pol = np.matrix([4.47513812154696, 5.02762430939227, 23.17243384704857])
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
sdp._evalPolicyMatrix()
assert (np.absolute(sdp.V - v_pol) < SMALLNUM).all()
def test_PolicyIteration_matrix_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
sdp.run()
v = np.matrix([26.2440000000000, 29.4840000000000, 33.4840000000000])
p = (0, 0, 0)
itr = 2
assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all()
assert (np.array(a.policy) == p).all()
assert a.iter == itr
def test_PolicyIteration_evalPolicyMatrix_exampleForest():
v_pol = np.matrix('4.47513812154696, 5.02762430939227, 23.17243384704857')
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
a._evalPolicynp.matrix()
assert (np.absolute(a.V - v_pol) < SMALLNUM).all()
def test_PolicyIteration_matrix_exampleForest():
a = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.9)
v = np.matrix('26.2440000000000, 29.4840000000000, 33.4840000000000')
p = np.matrix('0 0 0')
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.policy == p
assert sdp.iter == itr
def test_PolicyIteration_small():
sdp = mdptoolbox.mdp.PolicyIteration(P_small, R_small, 0.9)
sdp.run()
v = np.array([42.4418604651163, 36.0465116279070]) # from Octave MDPtoolbox
p = (1, 0) # from Octave MDPtoolbox
itr = 2 # from Octave MDPtoolbox
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.iter == itr
def test_PolicyIteration_small_sparse():
sdp = mdptoolbox.mdp.PolicyIteration(P_sparse, R_small, 0.9)
sdp.run()
v = np.array([42.4418604651163, 36.0465116279070]) # from Octave MDPtoolbox
p = (1, 0) # from Octave MDPtoolbox
itr = 2 # from Octave MDPtoolbox
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.iter == itr
def test_PolicyIterative_forest():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest, R_forest, 0.96)
sdp.run()
# v, p and itr from Octave MDPtoolbox
v = np.array([74.6496000000000, 78.1056000000000, 82.1056000000000])
p = (0, 0, 0)
itr = 2
assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all()
assert (np.array(a.policy) == p).all()
assert a.iter == itr
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.iter == itr
def test_PolicyIterative_forest_sparse():
sdp = mdptoolbox.mdp.PolicyIteration(P_forest_sparse, R_forest_sparse,
0.96)
sdp.run()
# v, p and itr from Octave MDPtoolbox
v = np.array([26.8301859311444, 28.0723241686974, 29.5099841658652,
31.1739424959205, 33.0998201927438, 35.3288453048078,
37.9087354808078, 40.8947194808078, 44.3507194808078,
48.3507194808078])
p = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
itr = 9
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.iter == itr
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 13:33:16 2013
@author: steve
"""
......@@ -10,40 +10,48 @@ import numpy as np
import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse
from utils import R_forest_sparse, P_small, R_small
from utils import R_forest_sparse, P_small, R_small, P_sparse
def test_QLearning_small():
np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_small, R_small, 0.9)
sdp.run()
q = np.matrix("33.330108655211646, 40.82109564847122; "
"34.37431040682546, 29.672368452303164")
v = np.matrix("40.82109564847122, 34.37431040682546")
p = np.matrix("1 0")
p = (1, 0)
assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(sdp.policy) == p).all()
assert sdp.policy == p
def test_QLearning_small_sparse():
np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_sparse, R_small, 0.9)
sdp.run()
q = np.matrix("33.330108655211646, 40.82109564847122; "
"34.37431040682546, 29.672368452303164")
v = np.matrix("40.82109564847122, 34.37431040682546")
p = (1, 0)
assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert sdp.policy == p
def test_QLearning_forest():
np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_forest, R_forest, 0.96)
sdp.run()
q = np.matrix("11.198908998901134, 10.34652034142302; "
"10.74229967143465, 11.741057920409865; "
"2.8698000059458546, 12.259732864170232")
v = np.matrix("11.198908998901134, 11.741057920409865, 12.259732864170232")
p = np.matrix("0 1 1")
p = (0, 1, 1)
assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(sdp.policy) == p).all()
assert sdp.policy == p
#FIXME: This is wrong as the number of states in this is util.STATES, not 3
def test_QLearning_forest_sparse():
np.random.seed(0)
sdp = mdptoolbox.mdp.QLearning(P_forest_sparse, R_forest_sparse, 0.96)
q = np.matrix("11.198908998901134, 10.34652034142302; "
"10.74229967143465, 11.741057920409865; "
"2.8698000059458546, 12.259732864170232")
v = np.matrix("11.198908998901134, 11.741057920409865, 12.259732864170232")
p = np.matrix("0 1 1")
assert (np.absolute(sdp.Q - q) < SMALLNUM).all()
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (np.array(sdp.policy) == p).all()
sdp.run()
p = (0, 1, 1, 1, 1, 1, 0, 0, 0, 0)
assert sdp.policy == p
......@@ -9,33 +9,38 @@ import numpy as np
import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse
def test_RelativeValueIteration_dense():
a = mdptoolbox.mdp.RelativeValueIteration(P_small, R_small)
p= np.matrix('1 0')
ar = 3.88523524641183
itr = 29
assert (np.array(a.policy) == p).all()
assert a.iter == itr
assert np.absolute(a.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_sparse():
a = mdptoolbox.mdp.RelativeValueIteration(P_sparse, R_small)
p= np.matrix('1 0')
ar = 3.88523524641183
itr = 29
assert (np.array(a.policy) == p).all()
assert a.iter == itr
assert np.absolute(a.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_exampleForest():
a = mdptoolbox.mdp.RelativeValueIteration(P_forest, R_forest)
itr = 4
from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse
from utils import R_forest_sparse, P_small, R_small, P_sparse
def test_RelativeValueIteration_small():
sdp = mdptoolbox.mdp.RelativeValueIteration(P_small, R_small)
sdp.run()
p = np.matrix('1 0')
ar = 3.88523524641183 # from Octave MDPtoolbox
assert (np.array(sdp.policy) == p).all()
assert np.absolute(sdp.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_small_sparse():
sdp = mdptoolbox.mdp.RelativeValueIteration(P_sparse, R_small)
sdp.run()
p = np.matrix('1 0')
ar = 3.88523524641183 # from Octave MDPtoolbox
assert (np.array(sdp.policy) == p).all()
assert np.absolute(sdp.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_forest():
sdp = mdptoolbox.mdp.RelativeValueIteration(P_forest, R_forest)
sdp.run()
p = np.matrix('0 0 0')
#v = np.matrix('-4.360000000000000 -0.760000000000000 3.240000000000000')
ar = 2.43000000000000
assert (np.array(a.policy) == p).all()
assert a.iter == itr
#assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all()
assert np.absolute(a.average_reward - ar) < SMALLNUM
ar = 3.24000000000000 # from Octave MDPtoolbox
assert (np.array(sdp.policy) == p).all()
assert np.absolute(sdp.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_forest_sparse():
sdp = mdptoolbox.mdp.RelativeValueIteration(P_forest_sparse,
R_forest_sparse)
sdp.run()
p = np.matrix('0 0 0 0 0 0 0 0 0 0')
ar = 1.54968195600000 # from Octave MDPtoolbox
assert (np.array(sdp.policy) == p).all()
assert np.absolute(sdp.average_reward - ar) < SMALLNUM
......@@ -9,32 +9,52 @@ import numpy as np
import mdptoolbox
from utils import SMALLNUM, STATES, P_forest, R_forest, P_forest_sparse
from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse
from utils import R_forest_sparse, P_rand, R_rand, P_rand_sparse, R_rand_sparse
from utils import P_small, R_small
from utils import P_small, R_small, P_sparse
def test_ValueIteration_small():
sdp = mdptoolbox.mdp.ValueIteration(P_small, R_small, 0.9, 0.01)
sdp = mdptoolbox.mdp.ValueIteration(P_small, R_small, 0.9)
sdp.run()
v = np.array((40.048625392716822, 33.65371175967546))
p = (1, 0)
itr = 26
assert (sdp.max_iter == 28)
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (sdp.policy == (1, 0))
assert (sdp.iter == 26)
assert (sdp.policy == p)
assert (sdp.iter == itr)
def test_ValueIteration_small_sparse():
sdp = mdptoolbox.mdp.ValueIteration(P_sparse, R_small, 0.9)
sdp.run()
v = np.array((40.048625392716822, 33.65371175967546))
p = (1, 0)
itr = 26
assert (sdp.max_iter == 28)
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
assert (sdp.policy == p)
assert (sdp.iter == itr)
def test_ValueIteration_forest():
sdp = mdptoolbox.mdp.ValueIteration(P_forest, R_forest, 0.96)
assert (np.array(sdp.policy) == np.array([0, 0, 0])).all()
sdp.run()
p = (0, 0, 0)
assert sdp.policy == p
assert sdp.iter == 4
def test_ValueIteration_forest_sparse():
sdp = mdptoolbox.mdp.ValueIteration(P_forest_sparse, R_forest_sparse, 0.96)
assert (np.array(sdp.policy) == np.array([0] * STATES)).all()
sdp.run()
p = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
assert sdp.policy == p
assert sdp.iter == 14
def test_ValueIteration_rand():
sdp = mdptoolbox.mdp.ValueIteration(P_rand, R_rand, 0.9)
sdp.run()
assert sdp.policy
def test_ValueIteration_rand_sparse():
sdp = mdptoolbox.mdp.ValueIteration(P_rand_sparse, R_rand_sparse, 0.9)
sdp.run()
assert sdp.policy
......@@ -9,18 +9,45 @@ import numpy as np
import mdptoolbox
from utils import SMALLNUM, P_forest, R_forest
def test_ValueIterationGS_boundIter_exampleForest():
a = mdptoolbox.mdp.ValueIterationGS(P_forest, R_forest, 0.9)
itr = 39
assert (a.max_iter == itr)
def test_ValueIterationGS_exampleForest():
a = mdptoolbox.mdp.ValueIterationGS(P_forest, R_forest, 0.9)
p = np.matrix('0 0 0')
v = np.matrix('25.5833879767579 28.8306546355469 32.8306546355469')
itr = 33
assert (np.array(a.policy) == p).all()
assert a.iter == itr
assert (np.absolute(np.array(a.V) - v) < SMALLNUM).all()
from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse
from utils import P_forest_sparse, R_forest_sparse
def test_ValueIterationGS_small():
sdp = mdptoolbox.mdp.ValueIterationGS(P_small, R_small, 0.9)
sdp.run()
p = (1, 0)
itr = 28 # from Octave MDPtoolbox
v = np.matrix('42.27744026138212, 35.89524504047155')
assert sdp.iter == itr
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
def test_ValueIterationGS_small_sparse():
sdp = mdptoolbox.mdp.ValueIterationGS(P_sparse, R_small, 0.9)
sdp.run()
p = (1, 0)
itr = 28 # from Octave MDPtoolbox
v = np.matrix('42.27744026138212, 35.89524504047155')
assert sdp.iter == itr
assert sdp.policy == p
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
def test_ValueIterationGS_forest():
sdp = mdptoolbox.mdp.ValueIterationGS(P_forest, R_forest, 0.96)
sdp.run()
p = (0, 0, 0)
v = np.matrix('69.98910821400665, 73.46560194552877, 77.46560194552877')
itr = 63 # from Octave MDPtoolbox
assert sdp.max_iter == 63
assert sdp.policy == p
assert sdp.iter == itr
assert (np.absolute(np.array(sdp.V) - v) < SMALLNUM).all()
def test_ValueIterationGS_forest_sparse():
sdp = mdptoolbox.mdp.ValueIterationGS(P_forest_sparse, R_forest_sparse,
0.96)
sdp.run()
p = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
itr = 16 # from Octave MDPtoolbox
assert sdp.policy == p
assert sdp.iter == itr
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment