Commit 6909d0bf by Steven Cordwell

### result of 2to3 conversion

parent 64fd9d8b
 ... ... @@ -78,7 +78,7 @@ http://www.inra.fr/mia/T/MDPtoolbox/. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. import mdp from . import mdp if __name__ == "__main__": import doctest ... ...
 ... ... @@ -146,11 +146,11 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False): # | p 0 0....0 1-p | | 1 0..........0 | if is_sparse: P = [] rows = range(S) * 2 cols = [0] * S + range(1, S) + [S - 1] rows = list(range(S)) * 2 cols = [0] * S + list(range(1, S)) + [S - 1] vals = [p] * S + [1-p] * S P.append(coo_matrix((vals, (rows, cols)), shape=(S,S)).tocsr()) rows = range(S) rows = list(range(S)) cols = [0] * S vals = [1] * S P.append(coo_matrix((vals, (rows, cols)), shape=(S,S)).tocsr()) ... ... @@ -223,13 +223,13 @@ def rand(S, A, is_sparse=False, mask=None): P = [None] * A # definition of reward matrix (values between -1 and +1) R = [None] * A for a in xrange(A): for a in range(A): # it may be more efficient to implement this by constructing lists # of rows, columns and values then creating a coo_matrix, but this # works for now PP = dok_matrix((S, S)) RR = dok_matrix((S, S)) for s in xrange(S): for s in range(S): if mask is None: m = random(S) m[m <= 2/3.0] = 0 ... ...
 ... ... @@ -65,7 +65,7 @@ from numpy import ndarray, ones, zeros from numpy.random import randint, random from scipy.sparse import csr_matrix as sparse from utils import check, getSpan from .utils import check, getSpan class MDP(object): ... ... @@ -236,10 +236,10 @@ class MDP(object): for aa in range(self.A)]) else: self.R = tuple([multiply(P[aa], R[aa]).sum(1).reshape(self.S) for aa in xrange(self.A)]) for aa in range(self.A)]) except AttributeError: self.R = tuple([multiply(P[aa], R[aa]).sum(1).reshape(self.S) for aa in xrange(self.A)]) for aa in range(self.A)]) def run(self): # Raise error because child classes should implement this function. ... ... @@ -337,8 +337,8 @@ class FiniteHorizon(MDP): self.V[:, self.N - n - 1] = X self.policy[:, self.N - n - 1] = W if self.verbose: print("stage: %s ... policy transpose : %s") % ( self.N - n, self.policy[:, self.N - n -1].tolist()) print(("stage: %s ... policy transpose : %s") % ( self.N - n, self.policy[:, self.N - n -1].tolist())) # update time spent running self.time = time() - self.time # After this we could create a tuple of tuples for the values and ... ... @@ -643,7 +643,7 @@ class PolicyIteration(MDP): variation = absolute(policy_V - Vprev).max() if self.verbose: print(' %s %s') % (itr, variation) print((' %s %s') % (itr, variation)) # ensure |Vn - Vpolicy| < epsilon if variation < ((1 - self.discount) / self.discount) * epsilon: ... ... @@ -710,8 +710,8 @@ class PolicyIteration(MDP): n_different = (policy_next != self.policy).sum() # if verbose then continue printing a table if self.verbose: print(' %s %s') % (self.iter, n_different) print((' %s %s') % (self.iter, n_different)) # Once the policy is unchanging of the maximum number of # of iterations has been reached then stop if n_different == 0: ... ... @@ -836,7 +836,7 @@ class PolicyIterationModified(PolicyIteration): variation = getSpan(Vnext - self.V) if self.verbose: print("\t%s\t%s" % (self.iter, variation)) print(("\t%s\t%s" % (self.iter, variation))) self.V = Vnext if variation < self.thresh: ... ... @@ -1114,7 +1114,7 @@ class RelativeValueIteration(MDP): variation = getSpan(Vnext - self.V) if self.verbose: print(" %s %s" % (self.iter, variation)) print((" %s %s" % (self.iter, variation))) if variation < self.epsilon: done = True ... ... @@ -1383,7 +1383,7 @@ class ValueIteration(MDP): variation = getSpan(self.V - Vprev) if self.verbose: print("\t%s\t%s" % (self.iter, variation)) print(("\t%s\t%s" % (self.iter, variation))) if variation < self.thresh: if self.verbose: ... ... @@ -1509,7 +1509,7 @@ class ValueIterationGS(ValueIteration): variation = getSpan(self.V - Vprev) if self.verbose: print(" %s %s" % (self.iter, variation)) print((" %s %s" % (self.iter, variation))) if variation < self.thresh: done = True ... ...
 ... ... @@ -132,7 +132,7 @@ def check(P, R): try: aP = len(P) sP0, sP1 = P[0].shape for aa in xrange(1, aP): for aa in range(1, aP): sP0aa, sP1aa = P[aa].shape if (sP0aa != sP0) or (sP1aa != sP1): raise InvalidMDPError(mdperr["obj_square"]) ... ... @@ -172,7 +172,7 @@ def check(P, R): if (sP0 != sR0) or (aP != aR): raise InvalidMDPError(mdperr["PR_incompat"]) # Check that the P's are square and stochastic for aa in xrange(aP): for aa in range(aP): checkSquareStochastic(P[aa]) #checkSquareStochastic(P[aa, :, :]) # We are at the end of the checks, so if no exceptions have been raised ... ...
 ... ... @@ -9,7 +9,7 @@ import numpy as np import mdptoolbox, mdptoolbox.example from utils import SMALLNUM, P_small, R_small from .utils import SMALLNUM, P_small, R_small def test_MDP_P_R_1(): P1 = [] ... ...
 ... ... @@ -9,8 +9,8 @@ import numpy as np import mdptoolbox from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse from utils import P_forest_sparse, R_forest_sparse from .utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse from .utils import P_forest_sparse, R_forest_sparse def test_PolicyIteration_init_policy0(): sdp = mdptoolbox.mdp.PolicyIteration(P_small, R_small, 0.9) ... ...
 ... ... @@ -9,8 +9,8 @@ import numpy as np import mdptoolbox from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse from utils import R_forest_sparse, P_small, R_small, P_sparse from .utils import SMALLNUM, P_forest, R_forest, P_forest_sparse from .utils import R_forest_sparse, P_small, R_small, P_sparse def test_QLearning_small(): np.random.seed(0) ... ...
 ... ... @@ -9,8 +9,8 @@ import numpy as np import mdptoolbox from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse from utils import R_forest_sparse, P_small, R_small, P_sparse from .utils import SMALLNUM, P_forest, R_forest, P_forest_sparse from .utils import R_forest_sparse, P_small, R_small, P_sparse def test_RelativeValueIteration_small(): sdp = mdptoolbox.mdp.RelativeValueIteration(P_small, R_small) ... ...
 ... ... @@ -9,9 +9,9 @@ import numpy as np import mdptoolbox from utils import SMALLNUM, P_forest, R_forest, P_forest_sparse from utils import R_forest_sparse, P_rand, R_rand, P_rand_sparse, R_rand_sparse from utils import P_small, R_small, P_sparse from .utils import SMALLNUM, P_forest, R_forest, P_forest_sparse from .utils import R_forest_sparse, P_rand, R_rand, P_rand_sparse, R_rand_sparse from .utils import P_small, R_small, P_sparse def test_ValueIteration_small(): sdp = mdptoolbox.mdp.ValueIteration(P_small, R_small, 0.9) ... ...
 ... ... @@ -9,8 +9,8 @@ import numpy as np import mdptoolbox from utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse from utils import P_forest_sparse, R_forest_sparse from .utils import SMALLNUM, P_forest, R_forest, P_small, R_small, P_sparse from .utils import P_forest_sparse, R_forest_sparse def test_ValueIterationGS_small(): sdp = mdptoolbox.mdp.ValueIterationGS(P_small, R_small, 0.9) ... ...
 ... ... @@ -9,8 +9,8 @@ import numpy as np import mdptoolbox.example from utils import ACTIONS, STATES, P_forest, R_forest, P_rand, R_rand from utils import P_rand_sparse, R_rand_sparse from .utils import ACTIONS, STATES, P_forest, R_forest, P_rand, R_rand from .utils import P_rand_sparse, R_rand_sparse def test_exampleForest_P_shape(): assert (P_forest == np.array([[[0.1, 0.9, 0.0], ... ...
 ... ... @@ -10,7 +10,7 @@ import scipy as sp import mdptoolbox from utils import ACTIONS, STATES from .utils import ACTIONS, STATES def test_check_square_stochastic_nonnegative_array_1(): P = np.zeros((ACTIONS, STATES, STATES)) ... ... @@ -55,21 +55,21 @@ def test_check_P_square_stochastic_nonnegative_object_sparse(): def test_check_P_square_stochastic_nonnegative_list_array(): P = [] R = np.random.rand(STATES, ACTIONS) for a in xrange(ACTIONS): for a in range(ACTIONS): P.append(np.eye(STATES)) assert (mdptoolbox.utils.check(P, R) == None) def test_check_P_square_stochastic_nonnegative_list_matrix(): P = [] R = np.random.rand(STATES, ACTIONS) for a in xrange(ACTIONS): for a in range(ACTIONS): P.append(np.matrix(np.eye(STATES))) assert (mdptoolbox.utils.check(P, R) == None) def test_check_P_square_stochastic_nonnegative_list_sparse(): P = [] R = np.random.rand(STATES, ACTIONS) for a in xrange(ACTIONS): for a in range(ACTIONS): P.append(sp.sparse.eye(STATES, STATES).tocsr()) assert (mdptoolbox.utils.check(P, R) == None) ... ... @@ -78,21 +78,21 @@ def test_check_P_square_stochastic_nonnegative_list_sparse(): def test_check_P_square_stochastic_nonnegative_dict_array(): P = {} R = np.random.rand(STATES, ACTIONS) for a in xrange(ACTIONS): for a in range(ACTIONS): P[a] = np.eye(STATES) assert (mdptoolbox.utils.check(P, R) == None) def test_check_P_square_stochastic_nonnegative_dict_matrix(): P = {} R = np.random.rand(STATES, ACTIONS) for a in xrange(ACTIONS): for a in range(ACTIONS): P[a] = np.matrix(np.eye(STATES)) assert (mdptoolbox.utils.check(P, R) == None) def test_check_P_square_stochastic_nonnegative_dict_sparse(): P = {} R = np.random.rand(STATES, ACTIONS) for a in xrange(ACTIONS): for a in range(ACTIONS): P[a] = sp.sparse.eye(STATES, STATES).tocsr() assert (mdptoolbox.utils.check(P, R) == None) ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!