Commit 4798ff01 authored by Steven Cordwell's avatar Steven Cordwell
Browse files

use rather than importing individual pieces from mdp

parent a1cf5c8d
......@@ -18,9 +18,7 @@ from scipy.sparse import eye as speye
from scipy.sparse import csr_matrix as sparse
#from scipy.stats.distributions import poisson
from mdp import check, checkSquareStochastic, exampleForest, exampleRand
from mdp import MDP, PolicyIteration, QLearning, RelativeValueIteration
from mdp import ValueIteration, ValueIterationGS
import mdp
STATES = 10
ACTIONS = 3
......@@ -32,9 +30,9 @@ R = array([[5, 10], [-1, 2]])
Ps = empty(2, dtype=object)
Ps[0] = sparse([[0.5, 0.5],[0.8, 0.2]])
Ps[1] = sparse([[0, 1],[0.1, 0.9]])
Pf, Rf = exampleForest()
Pr, Rr = exampleRand(STATES, ACTIONS)
Prs, Rrs = exampleRand(STATES, ACTIONS, is_sparse=True)
Pf, Rf = mdp.exampleForest()
Pr, Rr = mdp.exampleRand(STATES, ACTIONS)
Prs, Rrs = mdp.exampleRand(STATES, ACTIONS, is_sparse=True)
# check: square, stochastic and non-negative ndarrays
......@@ -44,14 +42,14 @@ def test_check_square_stochastic_nonnegative_array_1():
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
R[:, a] = rand(STATES)
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_square_stochastic_nonnegative_array_2():
P = zeros((ACTIONS, STATES, STATES))
R = rand(ACTIONS, STATES, STATES)
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
# check: P - square, stochastic and non-negative object arrays
......@@ -60,21 +58,21 @@ def test_check_P_square_stochastic_nonnegative_object_array():
R = rand(STATES, ACTIONS)
for a in range(ACTIONS):
P[a] = eye(STATES)
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_object_matrix():
P = empty(ACTIONS, dtype=object)
R = rand(STATES, ACTIONS)
for a in range(ACTIONS):
P[a] = matrix(eye(STATES))
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_object_sparse():
P = empty(ACTIONS, dtype=object)
R = rand(STATES, ACTIONS)
for a in range(ACTIONS):
P[a] = speye(STATES, STATES).tocsr()
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
# check: P - square, stochastic and non-negative lists
......@@ -83,21 +81,21 @@ def test_check_P_square_stochastic_nonnegative_list_array():
R = rand(STATES, ACTIONS)
for a in xrange(ACTIONS):
P.append(eye(STATES))
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_list_matrix():
P = []
R = rand(STATES, ACTIONS)
for a in xrange(ACTIONS):
P.append(matrix(eye(STATES)))
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_list_sparse():
P = []
R = rand(STATES, ACTIONS)
for a in xrange(ACTIONS):
P.append(speye(STATES, STATES).tocsr())
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
# check: P - square, stochastic and non-negative dicts
......@@ -106,21 +104,21 @@ def test_check_P_square_stochastic_nonnegative_dict_array():
R = rand(STATES, ACTIONS)
for a in xrange(ACTIONS):
P[a] = eye(STATES)
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_dict_matrix():
P = {}
R = rand(STATES, ACTIONS)
for a in xrange(ACTIONS):
P[a] = matrix(eye(STATES))
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_dict_sparse():
P = {}
R = rand(STATES, ACTIONS)
for a in xrange(ACTIONS):
P[a] = speye(STATES, STATES).tocsr()
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
# check: R - square stochastic and non-negative sparse
......@@ -129,7 +127,7 @@ def test_check_R_square_stochastic_nonnegative_sparse():
R = sparse(rand(STATES, ACTIONS))
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
# check: R - square, stochastic and non-negative object arrays
......@@ -139,7 +137,7 @@ def test_check_R_square_stochastic_nonnegative_object_array():
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
R[a] = rand(STATES, STATES)
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_R_square_stochastic_nonnegative_object_matrix():
P = zeros((ACTIONS, STATES, STATES))
......@@ -147,7 +145,7 @@ def test_check_R_square_stochastic_nonnegative_object_matrix():
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
R[a] = matrix(rand(STATES, STATES))
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
def test_check_R_square_stochastic_nonnegative_object_sparse():
P = zeros((ACTIONS, STATES, STATES))
......@@ -155,7 +153,7 @@ def test_check_R_square_stochastic_nonnegative_object_sparse():
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
R[a] = sparse(rand(STATES, STATES))
assert (check(P, R) == None)
assert (mdp.check(P, R) == None)
# checkSquareStochastic: square, stochastic and non-negative
......@@ -163,35 +161,35 @@ def test_checkSquareStochastic_square_stochastic_nonnegative_array():
P = rand(STATES, STATES)
for s in range(STATES):
P[s, :] = P[s, :] / P[s, :].sum()
assert checkSquareStochastic(P) == None
assert mdp.checkSquareStochastic(P) == None
def test_checkSquareStochastic_square_stochastic_nonnegative_matrix():
P = rand(STATES, STATES)
for s in range(STATES):
P[s, :] = P[s, :] / P[s, :].sum()
P = matrix(P)
assert checkSquareStochastic(P) == None
assert mdp.checkSquareStochastic(P) == None
def test_checkSquareStochastic_square_stochastic_nonnegative_sparse():
P = rand(STATES, STATES)
for s in range(STATES):
P[s, :] = P[s, :] / P[s, :].sum()
P = sparse(P)
assert checkSquareStochastic(P) == None
assert mdp.checkSquareStochastic(P) == None
# checkSquareStochastic: eye
def test_checkSquareStochastic_eye_array():
P = eye(STATES)
assert checkSquareStochastic(P) == None
assert mdp.checkSquareStochastic(P) == None
def test_checkSquareStochastic_eye_matrix():
P = matrix(eye(STATES))
assert checkSquareStochastic(P) == None
assert mdp.checkSquareStochastic(P) == None
def test_checkSquareStochastic_eye_sparse():
P = speye(STATES, STATES).tocsr()
assert checkSquareStochastic(P) == None
assert mdp.checkSquareStochastic(P) == None
# exampleForest
......@@ -209,8 +207,8 @@ def test_exampleForest_R_shape():
[4, 2]])).all()
def test_exampleForest_check():
P, R = exampleForest(10, 5, 3, 0.2)
assert check(P, R) == None
P, R = mdp.exampleForest(10, 5, 3, 0.2)
assert mdp.check(P, R) == None
# exampleRand
......@@ -221,7 +219,7 @@ def test_exampleRand_dense_R_shape():
assert (Rr.shape == (ACTIONS, STATES, STATES))
def test_exampleRand_dense_check():
assert check(Pr, Rr) == None
assert mdp.check(Pr, Rr) == None
def test_exampleRand_sparse_P_shape():
assert (len(Prs) == ACTIONS)
......@@ -230,7 +228,7 @@ def test_exampleRand_sparse_R_shape():
assert (len(Rrs) == ACTIONS)
def test_exampleRand_sparse_check():
assert check(Prs, Rrs) == None
assert mdp.check(Prs, Rrs) == None
# MDP
......@@ -243,7 +241,7 @@ def test_MDP_P_R_1():
R1.append(matrix('5; -1'))
R1.append(matrix('10; 2'))
R1 = tuple(R1)
a = MDP(P, R, 0.9, 0.01, 1)
a = mdp.MDP(P, R, 0.9, 0.01, 1)
assert type(a.P) == type(P1)
assert type(a.R) == type(R1)
for kk in range(2):
......@@ -260,7 +258,7 @@ def test_MDP_P_R_2():
R1.append(matrix('7.5; -0.4'))
R1.append(matrix('2; 3.9'))
R1 = tuple(R1)
a = MDP(P, R, 0.9, 0.01, 1)
a = mdp.MDP(P, R, 0.9, 0.01, 1)
assert type(a.P) == type(P1)
assert type(a.R) == type(R1)
for kk in range(2):
......@@ -274,7 +272,7 @@ def test_MDP_P_R_3():
PR.append(matrix('0.12591304; 0.1871'))
PR.append(matrix('0.20935652;0.2898'))
PR = tuple(PR)
a = MDP(P, R, 0.9, 0.01, 1)
a = mdp.MDP(P, R, 0.9, 0.01, 1)
for kk in range(2):
assert (absolute(a.R[kk] - PR[kk]) < SMALLNUM).all()
......@@ -291,17 +289,17 @@ def test_MDP_P_R_3():
# PolicyIteration
def test_PolicyIteration_init_policy0():
a = PolicyIteration(P, R, 0.9)
a = mdp.PolicyIteration(P, R, 0.9)
p = matrix('1; 1')
assert (a.policy == p).all()
def test_PolicyIteration_init_policy0_exampleForest():
a = PolicyIteration(Pf, Rf, 0.9)
a = mdp.PolicyIteration(Pf, Rf, 0.9)
p = matrix('0; 1; 0')
assert (a.policy == p).all()
def test_PolicyIteration_computePpolicyPRpolicy_exampleForest():
a = PolicyIteration(Pf, Rf, 0.9)
a = mdp.PolicyIteration(Pf, Rf, 0.9)
P1 = matrix('0.1 0.9 0; 1 0 0; 0.1 0 0.9')
R1 = matrix('0; 1; 4')
Ppolicy, Rpolicy = a._computePpolicyPRpolicy()
......@@ -312,7 +310,7 @@ def test_PolicyIteration_evalPolicyIterative_exampleForest():
v0 = matrix('0; 0; 0')
v1 = matrix('4.47504640074458; 5.02753258879703; 23.17234211944304')
p = matrix('0; 1; 0')
a = PolicyIteration(Pf, Rf, 0.9)
a = mdp.PolicyIteration(Pf, Rf, 0.9)
assert (absolute(a.V - v0) < SMALLNUM).all()
a._evalPolicyIterative()
assert (absolute(a.V - v1) < SMALLNUM).all()
......@@ -321,14 +319,14 @@ def test_PolicyIteration_evalPolicyIterative_exampleForest():
def test_PolicyIteration_evalPolicyIterative_bellmanOperator_exampleForest():
v = matrix('4.47504640074458; 5.02753258879703; 23.17234211944304')
p = matrix('0; 0; 0')
a = PolicyIteration(Pf, Rf, 0.9)
a = mdp.PolicyIteration(Pf, Rf, 0.9)
a._evalPolicyIterative()
policy, value = a._bellmanOperator()
assert (policy == p).all()
assert (absolute(a.V - v) < SMALLNUM).all()
def test_PolicyIteration_iterative_exampleForest():
a = PolicyIteration(Pf, Rf, 0.9, eval_type=1)
a = mdp.PolicyIteration(Pf, Rf, 0.9, eval_type=1)
v = matrix('26.2439058351861 29.4839058351861 33.4839058351861')
p = matrix('0 0 0')
itr = 2
......@@ -339,12 +337,12 @@ def test_PolicyIteration_iterative_exampleForest():
def test_PolicyIteration_evalPolicyMatrix_exampleForest():
v_pol = matrix('4.47513812154696; 5.02762430939227; 23.17243384704857')
a = PolicyIteration(Pf, Rf, 0.9)
a = mdp.PolicyIteration(Pf, Rf, 0.9)
a._evalPolicyMatrix()
assert (absolute(a.V - v_pol) < SMALLNUM).all()
def test_PolicyIteration_matrix_exampleForest():
a = PolicyIteration(Pf, Rf, 0.9)
a = mdp.PolicyIteration(Pf, Rf, 0.9)
v = matrix('26.2440000000000 29.4840000000000 33.4840000000000')
p = matrix('0 0 0')
itr = 2
......@@ -357,7 +355,7 @@ def test_PolicyIteration_matrix_exampleForest():
def test_QLearning():
randseed(0)
a = QLearning(P, R, 0.9)
a = mdp.QLearning(P, R, 0.9)
q = matrix('36.63245946346517 42.24434307022128; ' \
'35.96582807367007 32.70456417451635')
v = matrix('42.24434307022128 35.96582807367007')
......@@ -368,7 +366,7 @@ def test_QLearning():
assert (array(a.policy) == p).all()
def test_QLearning_exampleForest():
a = QLearning(Pf, Rf, 0.9)
a = mdp.QLearning(Pf, Rf, 0.9)
#q = matrix('26.1841860892231 18.6273657021260; ' \
# '29.5880960371007 18.5901207622881; '\
# '33.3526406657418 25.2621054631519')
......@@ -382,7 +380,7 @@ def test_QLearning_exampleForest():
# RelativeValueIteration
def test_RelativeValueIteration_dense():
a = RelativeValueIteration(P, R)
a = mdp.RelativeValueIteration(P, R)
p= matrix('1 0')
ar = 3.88523524641183
itr = 29
......@@ -392,7 +390,7 @@ def test_RelativeValueIteration_dense():
assert absolute(a.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_sparse():
a = RelativeValueIteration(Ps, R)
a = mdp.RelativeValueIteration(Ps, R)
p= matrix('1 0')
ar = 3.88523524641183
itr = 29
......@@ -402,7 +400,7 @@ def test_RelativeValueIteration_sparse():
assert absolute(a.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_exampleForest():
a = RelativeValueIteration(Pf, Rf)
a = mdp.RelativeValueIteration(Pf, Rf)
itr = 4
p = matrix('0 0 0')
#v = matrix('-4.360000000000000 -0.760000000000000 3.240000000000000')
......@@ -416,18 +414,18 @@ def test_RelativeValueIteration_exampleForest():
# ValueIteration
def test_ValueIteration_boundIter():
inst = ValueIteration(P, R, 0.9, 0.01)
inst = mdp.ValueIteration(P, R, 0.9, 0.01)
assert (inst.max_iter == 28)
def test_ValueIteration_iterate():
inst = ValueIteration(P, R, 0.9, 0.01)
inst = mdp.ValueIteration(P, R, 0.9, 0.01)
inst.iterate()
assert (inst.V == (40.048625392716822, 33.65371175967546))
assert (inst.policy == (1, 0))
assert (inst.iter == 26)
def test_ValueIteration_exampleForest():
a = ValueIteration(Pf, Rf, 0.96)
a = mdp.ValueIteration(Pf, Rf, 0.96)
a.iterate()
assert (a.policy == array([0, 0, 0])).all()
assert a.iter == 4
......@@ -435,12 +433,12 @@ def test_ValueIteration_exampleForest():
# ValueIterationGS
def test_ValueIterationGS_boundIter_exampleForest():
a = ValueIterationGS(Pf, Rf, 0.9)
a = mdp.ValueIterationGS(Pf, Rf, 0.9)
itr = 39
assert (a.max_iter == itr)
def test_ValueIterationGS_exampleForest():
a = ValueIterationGS(Pf, Rf, 0.9)
a = mdp.ValueIterationGS(Pf, Rf, 0.9)
p = matrix('0 0 0')
v = matrix('25.5833879767579 28.8306546355469 32.8306546355469')
itr = 33
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment