Commit 66995d31 authored by Steven Cordwell's avatar Steven Cordwell

[test] Test cases based on issue #7

Finite horizon tests are commented out until the horizon can be passed.
parent 3bf4f005
...@@ -6,11 +6,90 @@ Created on Sat Aug 24 14:18:51 2013 ...@@ -6,11 +6,90 @@ Created on Sat Aug 24 14:18:51 2013
""" """
import numpy as np import numpy as np
import scipy.sparse as sp
import mdptoolbox, mdptoolbox.example import mdptoolbox, mdptoolbox.example
from .utils import SMALLNUM, P_small, R_small from .utils import SMALLNUM, P_small, R_small
class TestMDP(object):
P = (((0.0, 0.0, 0.6, 0.4, 0.0),
(0.0, 0.0, 0.0, 0.0, 1.0),
(0.0, 0.0, 1.0, 0.0, 0.0),
(0.0, 0.0, 0.0, 1.0, 0.0),
(0.0, 0.0, 0.0, 0.0, 1.0)),
((0.0, 0.4, 0.0, 0.0, 0.6),
(0.0, 1.0, 0.0, 0.0, 0.0),
(0.0, 0.0, 0.0, 0.0, 1.0),
(0.0, 0.0, 0.0, 0.0, 1.0),
(0.0, 0.0, 0.0, 0.0, 1.0)))
R = (((0, 0, 0, 0, 0),
(0, 0, 0, 0, 0),
(0, 0, 0, 0, 0),
(0, 0, 0, 0, 0),
(0, 0, 0, 0, 0)),
((0, 0, 0, 0, 1),
(0, 0, 0, 0, 0),
(0, 0, 0, 0, 0),
(0, 0, 0, 0, 0),
(0, 0, 0, 0, 0)))
computed_R = ((0.0, 0.0, 0.0, 0.0, 0.0),
(0.6, 0.0, 0.0, 0.0, 0.0))
def test_a(self):
P = np.array(self.P)
R = np.array(self.R)
sdp = mdptoolbox.mdp.MDP(P, R, None, None, None)
assert sdp.S == 5
assert sdp.A == 2
assert (sdp.R[0] == np.array(self.computed_R[0])).all()
assert (sdp.R[1] == np.array(self.computed_R[1])).all()
def test_b(self):
P = np.array(self.P)
R = np.array(self.computed_R).T
sdp = mdptoolbox.mdp.MDP(P, R, None, None, None)
assert sdp.S == 5
assert sdp.A == 2
assert (sdp.R[0] == R[:, 0]).all()
assert (sdp.R[1] == R[:, 1]).all()
def test_c(self):
P = np.array(self.P)
R = (0, 1, 0, 1, 0)
sdp = mdptoolbox.mdp.MDP(P, R, None, None, None)
assert sdp.S == 5
assert sdp.A == 2
assert (sdp.R[0] == np.array(R)).all()
assert (sdp.R[1] == np.array(R)).all()
assert id(sdp.R[0]) == id(sdp.R[1])
def test_d(self):
P = [None] * 2
P[0] = sp.csr_matrix(np.array(self.P[0]))
P[1] = sp.csr_matrix(np.array(self.P[1]))
R = [None] * 2
R[0] = sp.csr_matrix(np.array(self.R[0]))
R[1] = sp.csr_matrix(np.array(self.R[1]))
sdp = mdptoolbox.mdp.MDP(P, R, None, None, None)
assert sdp.S == 5
assert sdp.A == 2
assert (sdp.R[0] == np.array(self.computed_R[0])).all()
assert (sdp.R[1] == np.array(self.computed_R[1])).all()
def test_e(self):
P = np.empty(shape=2, dtype=object)
P[0] = np.array(self.P[0])
P[1] = np.array(self.P[1])
R = np.empty(shape=2, dtype=object)
R[0] = np.array(self.R[0])
R[1] = np.array(self.R[1])
sdp = mdptoolbox.mdp.MDP(P, R, None, None, None)
assert sdp.S == 5
assert sdp.A == 2
assert (sdp.R[0] == np.array(self.computed_R[0])).all()
assert (sdp.R[1] == np.array(self.computed_R[1])).all()
def test_MDP_P_R_1(): def test_MDP_P_R_1():
P1 = [] P1 = []
P1.append(np.array(np.matrix('0.5 0.5; 0.8 0.2'))) P1.append(np.array(np.matrix('0.5 0.5; 0.8 0.2')))
...@@ -20,7 +99,7 @@ def test_MDP_P_R_1(): ...@@ -20,7 +99,7 @@ def test_MDP_P_R_1():
R1.append(np.array(np.matrix('5, -1'))) R1.append(np.array(np.matrix('5, -1')))
R1.append(np.array(np.matrix('10, 2'))) R1.append(np.array(np.matrix('10, 2')))
R1 = tuple(R1) R1 = tuple(R1)
a = mdptoolbox.mdp.MDP(P_small, R_small, 0.9, 0.01, 1) a = mdptoolbox.mdp.MDP(P_small, R_small, None, None, None)
assert type(a.P) == type(P1) assert type(a.P) == type(P1)
assert type(a.R) == type(R1) assert type(a.R) == type(R1)
for kk in range(2): for kk in range(2):
...@@ -37,7 +116,7 @@ def test_MDP_P_R_2(): ...@@ -37,7 +116,7 @@ def test_MDP_P_R_2():
R1.append(np.array(np.matrix('7.5, -0.4'))) R1.append(np.array(np.matrix('7.5, -0.4')))
R1.append(np.array(np.matrix('2, 3.9'))) R1.append(np.array(np.matrix('2, 3.9')))
R1 = tuple(R1) R1 = tuple(R1)
a = mdptoolbox.mdp.MDP(P_small, R, 0.9, 0.01, 1) a = mdptoolbox.mdp.MDP(P_small, R, None, None, None)
assert type(a.P) == type(P1) assert type(a.P) == type(P1)
assert type(a.R) == type(R1) assert type(a.R) == type(R1)
for kk in range(2): for kk in range(2):
...@@ -51,6 +130,6 @@ def test_MDP_P_R_3(): ...@@ -51,6 +130,6 @@ def test_MDP_P_R_3():
PR.append(np.array(np.matrix('0.12591304, 0.1871'))) PR.append(np.array(np.matrix('0.12591304, 0.1871')))
PR.append(np.array(np.matrix('0.20935652,0.2898'))) PR.append(np.array(np.matrix('0.20935652,0.2898')))
PR = tuple(PR) PR = tuple(PR)
a = mdptoolbox.mdp.MDP(P, R, 0.9, 0.01, 1) a = mdptoolbox.mdp.MDP(P, R, None, None, None)
for kk in range(2): for kk in range(2):
assert (np.absolute(a.R[kk] - PR[kk]) < SMALLNUM).all() assert (np.absolute(a.R[kk] - PR[kk]) < SMALLNUM).all()
# -*- coding: utf-8 -*-
import numpy as np
import scipy.sparse as sp
import mdptoolbox
class BaseTestIssue7(object):
discount = 0.9
P = [None] * 2
P[0] = np.array([
[ 0. , 0. , 0. , 0.64, 0. , 0. , 0.36, 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0.93, 0. , 0. , 0.07, 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0. , 0.8 ],
[ 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]
])
P[1] = np.array([
[ 0. , 0. , 0.4 , 0. , 0.6 , 0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. ],
[ 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0.87, 0.13, 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.11, 0.89],
[ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. , 0. ],
[ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]
])
R = [None] * 2
R[0] = np.zeros((9, 9))
R[1] = np.array([
[ 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0.]
])
computed_R = (np.array((0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)),
np.array((0.6, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)))
policy = (1, 1, 0, 0, 0, 0, 0, 0, 0)
def dense_P_dense_R(self, algorithm):
sdp = algorithm(self.P, self.R, self.discount)
if algorithm != mdptoolbox.mdp.QLearning:
assert (sdp.R[0] == self.computed_R[0]).all()
assert (sdp.R[1] == self.computed_R[1]).all()
assert not sp.issparse(sdp.P[0])
assert not sp.issparse(sdp.P[1])
assert not sp.issparse(sdp.R[0])
assert not sp.issparse(sdp.R[1])
sdp.run()
if algorithm != mdptoolbox.mdp.QLearning:
assert sdp.policy == self.policy, sdp.policy
def sparse_P_dense_R(self, algorithm):
P = list(map(sp.csr_matrix, self.P))
sdp = algorithm(P, self.R, self.discount)
if algorithm != mdptoolbox.mdp.QLearning:
assert (sdp.R[0] == self.computed_R[0]).all()
assert (sdp.R[1] == self.computed_R[1]).all()
assert sp.issparse(sdp.P[0])
assert sp.issparse(sdp.P[1])
assert not sp.issparse(sdp.R[0])
assert not sp.issparse(sdp.R[1])
sdp.run()
if algorithm != mdptoolbox.mdp.QLearning:
assert sdp.policy == self.policy, sdp.policy
def dense_P_sparse_R(self, algorithm):
R = list(map(sp.csr_matrix, self.R))
sdp = algorithm(self.P, R, self.discount)
if algorithm != mdptoolbox.mdp.QLearning:
assert (sdp.R[0] == self.computed_R[0]).all()
assert (sdp.R[1] == self.computed_R[1]).all()
assert not sp.issparse(sdp.P[0])
assert not sp.issparse(sdp.P[1])
#assert sp.issparse(sdp.R[0])
#assert sp.issparse(sdp.R[1])
sdp.run()
if algorithm != mdptoolbox.mdp.QLearning:
assert sdp.policy == self.policy, sdp.policy
def sparse_P_sparse_R(self, algorithm):
P = list(map(sp.csr_matrix, self.P))
R = list(map(sp.csr_matrix, self.R))
sdp = algorithm(P, R, self.discount)
if algorithm != mdptoolbox.mdp.QLearning:
assert (sdp.R[0] == self.computed_R[0]).all()
assert (sdp.R[1] == self.computed_R[1]).all()
assert sp.issparse(sdp.P[0])
assert sp.issparse(sdp.P[1])
#assert sp.issparse(sdp.R[0])
#assert sp.issparse(sdp.R[1])
sdp.run()
if algorithm != mdptoolbox.mdp.QLearning:
assert sdp.policy == self.policy, sdp.policy
# Needs some work before can use, need to pass horizon
#class TestFiniteHorizon(BaseTestIssue7):
#
# def test_dense_P_dense_R(self):
# self.dense_P_dense_R(mdptoolbox.mdp.FiniteHorizon)
#
# def test_sparse_P_dense_R(self):
# self.sparse_P_dense_R(mdptoolbox.mdp.FiniteHorizon)
#
# def test_dense_P_sparse_R(self):
# self.dense_P_sparse_R(mdptoolbox.mdp.FiniteHorizon)
#
# def test_sparse_P_sparse_R(self):
# self.sparse_P_sparse_R(mdptoolbox.mdp.FiniteHorizon)
class TestLP(BaseTestIssue7):
def test_dense_P_dense_R(self):
self.dense_P_dense_R(mdptoolbox.mdp.LP)
def test_sparse_P_dense_R(self):
self.sparse_P_dense_R(mdptoolbox.mdp.LP)
def test_dense_P_sparse_R(self):
self.dense_P_sparse_R(mdptoolbox.mdp.LP)
def test_sparse_P_sparse_R(self):
self.sparse_P_sparse_R(mdptoolbox.mdp.LP)
class TestPolicyIteration(BaseTestIssue7):
def test_dense_P_dense_R(self):
self.dense_P_dense_R(mdptoolbox.mdp.PolicyIteration)
def test_sparse_P_dense_R(self):
self.sparse_P_dense_R(mdptoolbox.mdp.PolicyIteration)
def test_dense_P_sparse_R(self):
self.dense_P_sparse_R(mdptoolbox.mdp.PolicyIteration)
def test_sparse_P_sparse_R(self):
self.sparse_P_sparse_R(mdptoolbox.mdp.PolicyIteration)
class TestPolicyIterationModified(BaseTestIssue7):
def test_dense_P_dense_R(self):
self.dense_P_dense_R(mdptoolbox.mdp.PolicyIterationModified)
def test_sparse_P_dense_R(self):
self.sparse_P_dense_R(mdptoolbox.mdp.PolicyIterationModified)
def test_dense_P_sparse_R(self):
self.dense_P_sparse_R(mdptoolbox.mdp.PolicyIterationModified)
def test_sparse_P_sparse_R(self):
self.sparse_P_sparse_R(mdptoolbox.mdp.PolicyIterationModified)
class TestQLearning(BaseTestIssue7):
def test_dense_P_dense_R(self):
self.dense_P_dense_R(mdptoolbox.mdp.QLearning)
def test_sparse_P_dense_R(self):
self.sparse_P_dense_R(mdptoolbox.mdp.QLearning)
def test_dense_P_sparse_R(self):
self.dense_P_sparse_R(mdptoolbox.mdp.QLearning)
def test_sparse_P_sparse_R(self):
self.sparse_P_sparse_R(mdptoolbox.mdp.QLearning)
class TestValueIteration(BaseTestIssue7):
def test_dense_P_dense_R(self):
self.dense_P_dense_R(mdptoolbox.mdp.ValueIteration)
def test_sparse_P_dense_R(self):
self.sparse_P_dense_R(mdptoolbox.mdp.ValueIteration)
def test_dense_P_sparse_R(self):
self.dense_P_sparse_R(mdptoolbox.mdp.ValueIteration)
def test_sparse_P_sparse_R(self):
self.sparse_P_sparse_R(mdptoolbox.mdp.ValueIteration)
class TestRelativeValueIteration(BaseTestIssue7):
def test_dense_P_dense_R(self):
self.dense_P_dense_R(mdptoolbox.mdp.RelativeValueIteration)
def test_sparse_P_dense_R(self):
self.sparse_P_dense_R(mdptoolbox.mdp.RelativeValueIteration)
def test_dense_P_sparse_R(self):
self.dense_P_sparse_R(mdptoolbox.mdp.RelativeValueIteration)
def test_sparse_P_sparse_R(self):
self.sparse_P_sparse_R(mdptoolbox.mdp.RelativeValueIteration)
class TestValueIterationGS(BaseTestIssue7):
def test_dense_P_dense_R(self):
self.dense_P_dense_R(mdptoolbox.mdp.ValueIterationGS)
def test_sparse_P_dense_R(self):
self.sparse_P_dense_R(mdptoolbox.mdp.ValueIterationGS)
def test_dense_P_sparse_R(self):
self.dense_P_sparse_R(mdptoolbox.mdp.ValueIterationGS)
def test_sparse_P_sparse_R(self):
self.sparse_P_sparse_R(mdptoolbox.mdp.ValueIterationGS)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment