Commit f836283a by Steven Cordwell

### work on unit tests, increase version number in setup.py

parent c9e04816
 ... ... @@ -841,16 +841,16 @@ class PolicyIteration(MDP): epsilon-optimum value function found or maximum number of iterations reached. """ if V0 == 0: V0 = zeros(self.S,1) Vpolicy = zeros(self.S, 1) else: raise NotImplementedError("evalPolicyIterative: case V0 != 0 not implemented. Use V0=0 instead.") Ppolicy, PRpolicy = self.computePpolicyPRpolicy(P, R, policy) Ppolicy, PRpolicy = self.computePpolicyPRpolicy(self.P, self.R, self.policy) if self.verbose: print(' Iteration V_variation') itr = 0 Vpolicy = V0 done = False while not done: itr = itr + 1 ... ... @@ -892,9 +892,9 @@ class PolicyIteration(MDP): """ from numpy.linalg import solve as lin_eq Ppolicy, PRpolicy = self.computePpolicyPRpolicy(P, R, policy) Ppolicy, PRpolicy = self.computePpolicyPRpolicy(self.P, self.R, self.policy) # V = PR + gPV => (I-gP)V = PR => V = inv(I-gP)* PR self.value = lin_eq((speye(S, S) - discount * Ppolicy) , PRpolicy) self.value = lin_eq((speye(self.S, self.S) - self.discount * Ppolicy) , PRpolicy) def iterate(self): """""" ... ... @@ -916,12 +916,12 @@ class PolicyIteration(MDP): self.bellmanOperator() n_different = (policy != policy_prev).sum() n_different = (self.policy != policy_prev).sum() if self.verbose: print(' %s %s') % (self.iter, n_different) if (policy == policy_prev).all() or (self.iter == self.max_iter): if (self.policy == policy_prev).all() or (self.iter == self.max_iter): done = True self.time = time() - self.time ... ...
 ... ... @@ -3,7 +3,7 @@ from distutils.core import setup setup(name="PyMDPtoolbox", version="0.1", version="0.7", description="Python Markov Decision Problem Toolbox", author="Steven Cordwell", author_email="steven.cordwell@uqconnect.edu.au", ... ...
 ... ... @@ -175,7 +175,7 @@ def test_MDP_P_R_1(): P1[0] = matrix([[0.5, 0.5],[0.8, 0.2]]) P1[1] = matrix([[0, 1],[0.1, 0.9]]) R1 = matrix([[5, 10], [-1, 2]]) a = MDP(P, R, 0.9) a = MDP(P, R, 0.9, 0.01) assert a.P.dtype == P1.dtype assert a.R.dtype == R1.dtype for kk in range(2): ... ... @@ -188,7 +188,7 @@ def test_MDP_P_R_2(): P1[0] = matrix([[0.5, 0.5],[0.8, 0.2]]) P1[1] = matrix([[0, 1],[0.1, 0.9]]) R1 = matrix([[7.5, 2], [-0.4, 3.9]]) a = MDP(P, R, 0.9) a = MDP(P, R, 0.9, 0.01) assert a.P.dtype == P1.dtype assert a.R.dtype == R1.dtype for kk in range(2): ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!