Commit 77dce9f8 by Steven Cordwell

### class LP is now fixed, it requires cvxopt module to be installed

parent 3eb541ec
 ... ... @@ -684,13 +684,14 @@ class LP(MDP): try: from cvxopt import matrix, solvers self.linprog = solvers.lp self.cvxmat = matrix except ImportError: raise ImportError("The python module cvxopt is required to use " \ "linear programming functionality.") from scipy.sparse import eye as speye MDP.__init__(self, transitions, reward, discount, None) MDP.__init__(self, transitions, reward, discount, None, None) # The objective is to resolve : min V / V >= PR + discount*P*V # The function linprog of the optimisation Toolbox of Mathworks resolves : ... ... @@ -698,24 +699,34 @@ class LP(MDP): # So the objective could be expressed as : min V / (discount*P-I) * V <= - PR # To avoid loop on states, the matrix M is structured following actions M(A*S,S) self.f = ones(self.S, 1) self.f = self.cvxmat(ones((self.S, 1))) self.M = zeros((self.A * self.S, self.S)) for aa in range(self.A): pos = (aa + 1) * self.S self.M[(pos - self.S):pos, :] = discount * self.P[aa] - speye(self.S, self.S) self.M = matrix(self.M) self.M = self.cvxmat(self.M) def iterate(self): """""" self.time = time() self.V = self.linprog(self.f, self.M, -self.R) h = self.cvxmat(self.R.reshape(self.S * self.A, 1, order="F"), tc='d') self.V, self.policy = self.bellmanOperator(self.P, self.R, self.discount, self.V) # Using the glpk option will make this behave more like Octave # (Octave uses glpk) and perhaps Matlab. If solver=None (ie using the # default cvxopt solver) then V agrees with the Octave equivalent # only to 10e-8 places. self.V = matrix(self.linprog(self.f, self.M, -h, solver='glpk')['x']) self.policy, self.V = self.bellmanOperator() self.time = time() - self.time # store value and policy as tuples self.V = tuple(self.V.getA1().tolist()) self.policy = tuple(self.policy.getA1().tolist()) class PolicyIteration(MDP): """Resolution of discounted MDP with policy iteration algorithm. ... ...
 ... ... @@ -5,8 +5,8 @@ Created on Sun May 27 23:16:57 2012 @author: - """ from mdp import check, checkSquareStochastic, exampleForest, exampleRand, MDP from mdp import PolicyIteration, QLearning, RelativeValueIteration from mdp import check, checkSquareStochastic, exampleForest, exampleRand, LP from mdp import MDP, PolicyIteration, QLearning, RelativeValueIteration from mdp import ValueIteration, ValueIterationGS from numpy import absolute, array, eye, matrix, zeros ... ... @@ -212,6 +212,16 @@ def test_MDP_P_R_3(): a = MDP(P, R, 0.9, 0.01, 1) assert (absolute(a.R - PR) < SMALLNUM).all() # LP def test_LP(): a = LP(P, R, 0.9) v = matrix('42.4418604651163 36.0465116279070') p = matrix('1 0') a.iterate() assert (array(a.policy) == p).all() assert (absolute(array(a.V) - v) < SMALLNUM).all() # PolicyIteration def test_PolicyIteration_init_policy0(): ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!