Commit c8bbc99c by Steven Cordwell

### remove use of random module and replace with numpy.random

parent eb40e9c6
 ... ... @@ -33,4 +33,4 @@ setup(name="pymdptoolbox", packages=["mdptoolbox"], package_dir={"": "src"}, requires=["math", "numpy", "random", "scipy", "time"],) requires=["math", "numpy", "scipy", "time"],)
 ... ... @@ -5,13 +5,11 @@ Created on Sun Aug 18 14:32:25 2013 @author: steve """ from random import random from numpy import diag, ones, where, zeros from numpy.random import rand, randint from numpy.random import randint, random from scipy.sparse import coo_matrix, dok_matrix def exampleForest(S=3, r1=4, r2=2, p=0.1, is_sparse=False): def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False): """Generate a MDP example based on a simple forest management scenario. This function is used to generate a transition probability ... ... @@ -144,7 +142,7 @@ def exampleForest(S=3, r1=4, r2=2, p=0.1, is_sparse=False): # we want to return the generated transition and reward matrices return (P, R) def exampleRand(S, A, is_sparse=False, mask=None): def rand(S, A, is_sparse=False, mask=None): """Generate a random Markov Decision Process. Parameters ... ... @@ -198,7 +196,7 @@ def exampleRand(S, A, is_sparse=False, mask=None): RR = dok_matrix((S, S)) for s in xrange(S): if mask is None: m = rand(S) m = random(S) m[m <= 2/3.0] = 0 m[m > 2/3.0] = 1 elif mask.shape == (A, S, S): ... ... @@ -210,9 +208,9 @@ def exampleRand(S, A, is_sparse=False, mask=None): m[randint(0, S)] = 1 n = 1 cols = where(m)[0] # m[s, :] vals = rand(n) vals = random(n) vals = vals / vals.sum() reward = 2*rand(n) - ones(n) reward = 2*random(n) - ones(n) PP[s, cols] = vals RR[s, cols] = reward # PP.tocsr() takes the same amount of time as PP.tocoo().tocsr() ... ... @@ -229,7 +227,7 @@ def exampleRand(S, A, is_sparse=False, mask=None): for s in range(S): # create our own random mask if there is no user supplied one if mask is None: m = rand(S) m = random(S) r = random() m[m <= r] = 0 m[m > r] = 1 ... ... @@ -241,8 +239,8 @@ def exampleRand(S, A, is_sparse=False, mask=None): if m.sum() == 0: m[randint(0, S)] = 1 n = 1 P[a][s] = m * rand(S) P[a][s] = m * random(S) P[a][s] = P[a][s] / P[a][s].sum() R[a][s] = (m * (2*rand(S) - ones(S, dtype=int))) R[a][s] = (m * (2*random(S) - ones(S, dtype=int))) # we want to return the generated transition and reward matrices return (P, R)
 ... ... @@ -95,12 +95,11 @@ http://www.inra.fr/mia/T/MDPtoolbox/. # POSSIBILITY OF SUCH DAMAGE. from math import ceil, log, sqrt from random import random from time import time from numpy import absolute, array, empty, mean, mod, multiply from numpy import ndarray, ones, zeros from numpy.random import randint from numpy.random import randint, random from scipy.sparse import csr_matrix as sparse from utils import check, getSpan ... ... @@ -991,10 +990,8 @@ class QLearning(MDP): >>> # These examples are reproducible only if random seed is set to 0 in >>> # both the random and numpy.random modules. >>> import numpy as np >>> import random >>> import mdptoolbox, mdptoolbox.example >>> np.random.seed(0) >>> random.seed(0) >>> P, R = mdptoolbox.example.forest() >>> ql = mdptoolbox.mdp.QLearning(P, R, 0.96) >>> ql.Q ... ... @@ -1007,12 +1004,10 @@ class QLearning(MDP): (0, 0, 0) >>> import mdptoolbox >>> import random >>> import numpy as np >>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]]) >>> R = np.array([[5, 10], [-1, 2]]) >>> np.random.seed(0) >>> random.seed(0) >>> pim = mdptoolbox.mdp.QLearning(P, R, 0.9) >>> ql.Q array([[ 39.933691 , 43.17543338], ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!