Commit f836283a authored by Steven Cordwell's avatar Steven Cordwell
Browse files

work on unit tests, increase version number in setup.py

parent c9e04816
......@@ -841,16 +841,16 @@ class PolicyIteration(MDP):
epsilon-optimum value function found or maximum number of iterations reached.
"""
if V0 == 0:
V0 = zeros(self.S,1)
Vpolicy = zeros(self.S, 1)
else:
raise NotImplementedError("evalPolicyIterative: case V0 != 0 not implemented. Use V0=0 instead.")
Ppolicy, PRpolicy = self.computePpolicyPRpolicy(P, R, policy)
Ppolicy, PRpolicy = self.computePpolicyPRpolicy(self.P, self.R, self.policy)
if self.verbose:
print(' Iteration V_variation')
itr = 0
Vpolicy = V0
done = False
while not done:
itr = itr + 1
......@@ -892,9 +892,9 @@ class PolicyIteration(MDP):
"""
from numpy.linalg import solve as lin_eq
Ppolicy, PRpolicy = self.computePpolicyPRpolicy(P, R, policy)
Ppolicy, PRpolicy = self.computePpolicyPRpolicy(self.P, self.R, self.policy)
# V = PR + gPV => (I-gP)V = PR => V = inv(I-gP)* PR
self.value = lin_eq((speye(S, S) - discount * Ppolicy) , PRpolicy)
self.value = lin_eq((speye(self.S, self.S) - self.discount * Ppolicy) , PRpolicy)
def iterate(self):
""""""
......@@ -916,12 +916,12 @@ class PolicyIteration(MDP):
self.bellmanOperator()
n_different = (policy != policy_prev).sum()
n_different = (self.policy != policy_prev).sum()
if self.verbose:
print(' %s %s') % (self.iter, n_different)
if (policy == policy_prev).all() or (self.iter == self.max_iter):
if (self.policy == policy_prev).all() or (self.iter == self.max_iter):
done = True
self.time = time() - self.time
......
......@@ -3,7 +3,7 @@
from distutils.core import setup
setup(name="PyMDPtoolbox",
version="0.1",
version="0.7",
description="Python Markov Decision Problem Toolbox",
author="Steven Cordwell",
author_email="steven.cordwell@uqconnect.edu.au",
......
......@@ -175,7 +175,7 @@ def test_MDP_P_R_1():
P1[0] = matrix([[0.5, 0.5],[0.8, 0.2]])
P1[1] = matrix([[0, 1],[0.1, 0.9]])
R1 = matrix([[5, 10], [-1, 2]])
a = MDP(P, R, 0.9)
a = MDP(P, R, 0.9, 0.01)
assert a.P.dtype == P1.dtype
assert a.R.dtype == R1.dtype
for kk in range(2):
......@@ -188,7 +188,7 @@ def test_MDP_P_R_2():
P1[0] = matrix([[0.5, 0.5],[0.8, 0.2]])
P1[1] = matrix([[0, 1],[0.1, 0.9]])
R1 = matrix([[7.5, 2], [-0.4, 3.9]])
a = MDP(P, R, 0.9)
a = MDP(P, R, 0.9, 0.01)
assert a.P.dtype == P1.dtype
assert a.R.dtype == R1.dtype
for kk in range(2):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment