Commit 77dce9f8 by Steven Cordwell

### class LP is now fixed, it requires cvxopt module to be installed

parent 3eb541ec
 ... @@ -684,13 +684,14 @@ class LP(MDP): ... @@ -684,13 +684,14 @@ class LP(MDP): try: try: from cvxopt import matrix, solvers from cvxopt import matrix, solvers self.linprog = solvers.lp self.linprog = solvers.lp self.cvxmat = matrix except ImportError: except ImportError: raise ImportError("The python module cvxopt is required to use " \ raise ImportError("The python module cvxopt is required to use " \ "linear programming functionality.") "linear programming functionality.") from scipy.sparse import eye as speye from scipy.sparse import eye as speye MDP.__init__(self, transitions, reward, discount, None) MDP.__init__(self, transitions, reward, discount, None, None) # The objective is to resolve : min V / V >= PR + discount*P*V # The objective is to resolve : min V / V >= PR + discount*P*V # The function linprog of the optimisation Toolbox of Mathworks resolves : # The function linprog of the optimisation Toolbox of Mathworks resolves : ... @@ -698,24 +699,34 @@ class LP(MDP): ... @@ -698,24 +699,34 @@ class LP(MDP): # So the objective could be expressed as : min V / (discount*P-I) * V <= - PR # So the objective could be expressed as : min V / (discount*P-I) * V <= - PR # To avoid loop on states, the matrix M is structured following actions M(A*S,S) # To avoid loop on states, the matrix M is structured following actions M(A*S,S) self.f = ones(self.S, 1) self.f = self.cvxmat(ones((self.S, 1))) self.M = zeros((self.A * self.S, self.S)) self.M = zeros((self.A * self.S, self.S)) for aa in range(self.A): for aa in range(self.A): pos = (aa + 1) * self.S pos = (aa + 1) * self.S self.M[(pos - self.S):pos, :] = discount * self.P[aa] - speye(self.S, self.S) self.M[(pos - self.S):pos, :] = discount * self.P[aa] - speye(self.S, self.S) self.M = matrix(self.M) self.M = self.cvxmat(self.M) def iterate(self): def iterate(self): """""" """""" self.time = time() self.time = time() self.V = self.linprog(self.f, self.M, -self.R) h = self.cvxmat(self.R.reshape(self.S * self.A, 1, order="F"), tc='d') self.V, self.policy = self.bellmanOperator(self.P, self.R, self.discount, self.V) # Using the glpk option will make this behave more like Octave # (Octave uses glpk) and perhaps Matlab. If solver=None (ie using the # default cvxopt solver) then V agrees with the Octave equivalent # only to 10e-8 places. self.V = matrix(self.linprog(self.f, self.M, -h, solver='glpk')['x']) self.policy, self.V = self.bellmanOperator() self.time = time() - self.time self.time = time() - self.time # store value and policy as tuples self.V = tuple(self.V.getA1().tolist()) self.policy = tuple(self.policy.getA1().tolist()) class PolicyIteration(MDP): class PolicyIteration(MDP): """Resolution of discounted MDP with policy iteration algorithm. """Resolution of discounted MDP with policy iteration algorithm. ... ...
 ... @@ -5,8 +5,8 @@ Created on Sun May 27 23:16:57 2012 ... @@ -5,8 +5,8 @@ Created on Sun May 27 23:16:57 2012 @author: - @author: - """ """ from mdp import check, checkSquareStochastic, exampleForest, exampleRand, MDP from mdp import check, checkSquareStochastic, exampleForest, exampleRand, LP from mdp import PolicyIteration, QLearning, RelativeValueIteration from mdp import MDP, PolicyIteration, QLearning, RelativeValueIteration from mdp import ValueIteration, ValueIterationGS from mdp import ValueIteration, ValueIterationGS from numpy import absolute, array, eye, matrix, zeros from numpy import absolute, array, eye, matrix, zeros ... @@ -212,6 +212,16 @@ def test_MDP_P_R_3(): ... @@ -212,6 +212,16 @@ def test_MDP_P_R_3(): a = MDP(P, R, 0.9, 0.01, 1) a = MDP(P, R, 0.9, 0.01, 1) assert (absolute(a.R - PR) < SMALLNUM).all() assert (absolute(a.R - PR) < SMALLNUM).all() # LP def test_LP(): a = LP(P, R, 0.9) v = matrix('42.4418604651163 36.0465116279070') p = matrix('1 0') a.iterate() assert (array(a.policy) == p).all() assert (absolute(array(a.V) - v) < SMALLNUM).all() # PolicyIteration # PolicyIteration def test_PolicyIteration_init_policy0(): def test_PolicyIteration_init_policy0(): ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!