Commit b7858639 authored by Steven Cordwell's avatar Steven Cordwell

Make cosmetic changes to improve style of code

parent 9a04a050
......@@ -164,11 +164,11 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False):
rows = list(range(S)) * 2
cols = [0] * S + list(range(1, S)) + [S - 1]
vals = [p] * S + [1-p] * S
P.append(_sp.coo_matrix((vals, (rows, cols)), shape=(S,S)).tocsr())
P.append(_sp.coo_matrix((vals, (rows, cols)), shape=(S, S)).tocsr())
rows = list(range(S))
cols = [0] * S
vals = [1] * S
P.append(_sp.coo_matrix((vals, (rows, cols)), shape=(S,S)).tocsr())
P.append(_sp.coo_matrix((vals, (rows, cols)), shape=(S, S)).tocsr())
else:
P = _np.zeros((2, S, S))
P[0, :, :] = (1 - p) * _np.diag(_np.ones(S - 1), 1)
......@@ -182,7 +182,6 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False):
R[:, 1] = _np.ones(S)
R[0, 1] = 0
R[S - 1, 1] = r2
# we want to return the generated transition and reward matrices
return(P, R)
def rand(S, A, is_sparse=False, mask=None):
......@@ -338,12 +337,11 @@ def rand(S, A, is_sparse=False, mask=None):
P[a][s] = P[a][s] / P[a][s].sum()
R[a][s] = (m * (2 * _np.random.random(S) -
_np.ones(S, dtype=int)))
# we want to return the generated transition and reward matrices
return(P, R)
def small():
"""A very small Markov decision process.
The probability transition matrices are::
| | 0.5 0.5 | |
......@@ -356,7 +354,7 @@ def small():
R = | 5 10 |
| -1 2 |
Returns
=======
out : tuple
......@@ -378,6 +376,6 @@ def small():
[-1, 2]])
"""
P = _np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
P = _np.array([[[0.5, 0.5], [0.8, 0.2]], [[0, 1], [0.1, 0.9]]])
R = _np.array([[5, 10], [-1, 2]])
return(P, R)
......@@ -246,7 +246,7 @@ class MDP(object):
if P.ndim == 3:
self.S = P.shape[1]
else:
self.S = P[0].shape[0]
self.S = P[0].shape[0]
except AttributeError:
self.S = P[0].shape[0]
# convert P to a tuple of numpy arrays
......@@ -281,14 +281,14 @@ class MDP(object):
self.R = tuple(r for aa in range(self.A))
elif R.ndim == 2:
self.R = tuple(_np.array(R[:, aa]).reshape(self.S)
for aa in range(self.A))
for aa in range(self.A))
else:
self.R = tuple(_np.multiply(P[aa], R[aa]).sum(1).reshape(self.S)
for aa in range(self.A))
for aa in range(self.A))
except AttributeError:
if len(R) == self.A:
self.R = tuple(_np.multiply(P[aa], R[aa]).sum(1).reshape(self.S)
for aa in range(self.A))
for aa in range(self.A))
else:
r = _np.array(R).reshape(self.S)
self.R = tuple(r for aa in range(self.A))
......@@ -375,8 +375,6 @@ class FiniteHorizon(MDP):
# Set the reward for the final transition to h, if specified.
if h is not None:
self.V[:, N] = h
# Call the iteration method
#self.run()
def run(self):
# Run the finite horizon algorithm.
......@@ -459,8 +457,6 @@ class LP(MDP):
# this doesn't do what I want it to do c.f. issue #3
if not self.verbose:
solvers.options['show_progress'] = False
# Call the iteration method
#self.run()
def run(self):
#Run the linear programming algorithm.
......@@ -488,7 +484,7 @@ class LP(MDP):
# only to 10e-8 places. This assumes glpk is installed of course.
self.V = _np.array(self._linprog(f, M, -h)['x']).reshape(self.S)
# apply the Bellman operator
self.policy, self.V = self._bellmanOperator()
self.policy, self.V = self._bellmanOperator()
# update the time spent solving
self.time = _time.time() - self.time
# store value and policy as tuples
......@@ -560,7 +556,7 @@ class PolicyIteration(MDP):
# Set up the MDP, but don't need to worry about epsilon values
MDP.__init__(self, transitions, reward, discount, None, max_iter)
# Check if the user has supplied an initial policy. If not make one.
if policy0 == None:
if policy0 is None:
# Initialise the policy to the one which maximises the expected
# immediate reward
null = _np.zeros(self.S)
......@@ -592,8 +588,6 @@ class PolicyIteration(MDP):
raise ValueError("'eval_type' should be '0' for matrix evaluation "
"or '1' for iterative evaluation. The strings "
"'matrix' and 'iterative' can also be used.")
# Call the iteration method
#self.run()
def _computePpolicyPRpolicy(self):
# Compute the transition matrix and the reward matrix for a policy.
......@@ -768,7 +762,7 @@ class PolicyIteration(MDP):
done = True
if self.verbose:
print(_MSG_STOP_UNCHANGING_POLICY)
elif (self.iter == self.max_iter):
elif self.iter == self.max_iter:
done = True
if self.verbose:
print(_MSG_STOP_MAX_ITER)
......@@ -857,9 +851,6 @@ class PolicyIterationModified(PolicyIteration):
Rmin = min(R.min() for R in self.R)
self.V = 1 / (1 - self.discount) * Rmin * _np.ones((self.S,))
# Call the iteration method
#self.run()
def run(self):
# Run the modified policy iteration algorithm.
......@@ -991,9 +982,6 @@ class QLearning(MDP):
self.Q = _np.zeros((self.S, self.A))
self.mean_discrepancy = []
# Call the iteration method
#self.run()
def run(self):
# Run the Q-learning algoritm.
discrepancy = []
......@@ -1006,13 +994,13 @@ class QLearning(MDP):
for n in range(1, self.max_iter + 1):
# Reinitialisation of trajectories every 100 transitions
if ((n % 100) == 0):
if (n % 100) == 0:
s = _np.random.randint(0, self.S)
# Action choice : greedy with increasing probability
# probability 1-(1/log(n+2)) can be changed
pn = _np.random.random()
if (pn < (1 - (1 / _math.log(n + 2)))):
if pn < (1 - (1 / _math.log(n + 2))):
# optimal_action = self.Q[s, :].max()
a = self.Q[s, :].argmax()
else:
......@@ -1022,7 +1010,7 @@ class QLearning(MDP):
p_s_new = _np.random.random()
p = 0
s_new = -1
while ((p < p_s_new) and (s_new < (self.S - 1))):
while (p < p_s_new) and (s_new < (self.S - 1)):
s_new = s_new + 1
p = p + self.P[a][s, s_new]
......@@ -1139,9 +1127,6 @@ class RelativeValueIteration(MDP):
self.average_reward = None
# Call the iteration method
#self.run()
def run(self):
# Run the relative value iteration algorithm.
......@@ -1153,7 +1138,7 @@ class RelativeValueIteration(MDP):
while not done:
self.iter += 1;
self.iter += 1
self.policy, Vnext = self._bellmanOperator()
Vnext = Vnext - self.gain
......@@ -1164,15 +1149,15 @@ class RelativeValueIteration(MDP):
print((" %s\t\t %s" % (self.iter, variation)))
if variation < self.epsilon:
done = True
self.average_reward = self.gain + (Vnext - self.V).min()
if self.verbose:
print(_MSG_STOP_EPSILON_OPTIMAL_POLICY)
done = True
self.average_reward = self.gain + (Vnext - self.V).min()
if self.verbose:
print(_MSG_STOP_EPSILON_OPTIMAL_POLICY)
elif self.iter == self.max_iter:
done = True
self.average_reward = self.gain + (Vnext - self.V).min()
if self.verbose:
print(_MSG_STOP_MAX_ITER)
done = True
self.average_reward = self.gain + (Vnext - self.V).min()
if self.verbose:
print(_MSG_STOP_MAX_ITER)
self.V = Vnext
self.gain = float(self.V[self.S - 1])
......@@ -1320,9 +1305,6 @@ class ValueIteration(MDP):
# threshold of variation for V for an epsilon-optimal policy
self.thresh = epsilon
# Call the iteration method
#self.run()
def _boundIter(self, epsilon):
# Compute a bound for the number of iterations.
#
......@@ -1395,7 +1377,7 @@ class ValueIteration(MDP):
if self.verbose:
print(_MSG_STOP_EPSILON_OPTIMAL_POLICY)
break
elif (self.iter == self.max_iter):
elif self.iter == self.max_iter:
if self.verbose:
print(_MSG_STOP_MAX_ITER)
break
......@@ -1491,9 +1473,6 @@ class ValueIterationGS(ValueIteration):
# threshold of variation for V for an epsilon-optimal policy
self.thresh = epsilon
# Call the iteration method
#self.run()
def run(self):
# Run the value iteration Gauss-Seidel algorithm.
......@@ -1534,7 +1513,7 @@ class ValueIterationGS(ValueIteration):
for s in range(self.S):
Q = _np.zeros(self.A)
for a in range(self.A):
Q[a] = self.R[a][s] + self.discount * self.P[a][s,:].dot(self.V)
Q[a] = self.R[a][s] + self.discount * self.P[a][s, :].dot(self.V)
self.V[s] = Q.max()
self.policy.append(int(Q.argmax()))
......
......@@ -19,12 +19,12 @@ getSpan
# Copyright (c) 2011-2013 Steven A. W. Cordwell
# Copyright (c) 2009 INRA
#
#
# All rights reserved.
#
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
......@@ -33,7 +33,7 @@ getSpan
# * Neither the name of the <ORGANIZATION> nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
......@@ -49,7 +49,7 @@ getSpan
import numpy as _np
# These need to be fixed so that we use classes derived from Error.
mdperr = {
MDPERR = {
"mat_nonneg" :
"Transition probabilities must be non-negative.",
"mat_square" :
......@@ -84,9 +84,9 @@ mdperr = {
def check(P, R):
"""Check if ``P`` and ``R`` define a valid Markov Decision Process (MDP).
Let ``S`` = number of states, ``A`` = number of actions.
Parameters
---------
P : array
......@@ -99,18 +99,18 @@ def check(P, R):
shape of (S, A, A). It can also be a one dimensional array with a
shape of (A, ), where each element contains matrix with a shape of
(S, S) which can possibly be sparse. It can also be an array with
a shape of (S, A) which can possibly be sparse.
a shape of (S, A) which can possibly be sparse.
Notes
-----
Raises an error if ``P`` and ``R`` do not define a MDP.
Examples
--------
>>> import mdptoolbox, mdptoolbox.example
>>> P_valid, R_valid = mdptoolbox.example.rand(100, 5)
>>> mdptoolbox.util.check(P_valid, R_valid) # Nothing should happen
>>>
>>>
>>> import numpy as np
>>> P_invalid = np.random.rand(5, 100, 100)
>>> mdptoolbox.util.check(P_invalid, R_valid) # Raises an exception
......@@ -128,7 +128,7 @@ def check(P, R):
# continue checking from there
raise AttributeError
else:
raise InvalidMDPError(mdperr["P_shape"])
raise InvalidMDPError(MDPERR["P_shape"])
except AttributeError:
try:
aP = len(P)
......@@ -136,9 +136,9 @@ def check(P, R):
for aa in range(1, aP):
sP0aa, sP1aa = P[aa].shape
if (sP0aa != sP0) or (sP1aa != sP1):
raise InvalidMDPError(mdperr["obj_square"])
raise InvalidMDPError(MDPERR["obj_square"])
except AttributeError:
raise InvalidMDPError(mdperr["P_shape"])
raise InvalidMDPError(MDPERR["P_shape"])
# Checking R
try:
ndimR = R.ndim
......@@ -151,7 +151,7 @@ def check(P, R):
elif ndimR == 3:
aR, sR0, sR1 = R.shape
else:
raise InvalidMDPError(mdperr["R_shape"])
raise InvalidMDPError(MDPERR["R_shape"])
except AttributeError:
try:
lenR = len(R)
......@@ -160,15 +160,15 @@ def check(P, R):
sR0, sR1 = R[0].shape
for aa in range(1, aR):
sR0aa, sR1aa = R[aa].shape
if ((sR0aa != sR0) or (sR1aa != sR1)):
raise InvalidMDPError(mdperr["obj_square"])
if (sR0aa != sR0) or (sR1aa != sR1):
raise InvalidMDPError(MDPERR["obj_square"])
elif lenR == sP0:
aR = aP
sR0 = sR1 = lenR
else:
raise InvalidMDPError(mdperr["R_shape"])
raise InvalidMDPError(MDPERR["R_shape"])
except AttributeError:
raise InvalidMDPError(mdperr["R_shape"])
raise InvalidMDPError(MDPERR["R_shape"])
# Checking dimensions
assert sP0 > 0, "The number of states in P must be greater than 0."
assert aP > 0, "The number of actions in P must be greater than 0."
......@@ -183,13 +183,12 @@ def check(P, R):
checkSquareStochastic(P[aa])
# We are at the end of the checks, so if no exceptions have been raised
# then that means there are (hopefullly) no errors and we return None
return None
# These are the old code comments, which need to be converted to
# information in the docstring:
#
# tranitions must be a numpy array either an AxSxS ndarray (with any
# dtype other than "object"); or, a 1xA ndarray with a "object" dtype,
# tranitions must be a numpy array either an AxSxS ndarray (with any
# dtype other than "object"); or, a 1xA ndarray with a "object" dtype,
# and each element containing an SxS array. An AxSxS array will be
# be converted to an object array. A numpy object array is similar to a
# MATLAB cell array.
......@@ -208,7 +207,7 @@ def check(P, R):
# As above but for the reward array. A difference is that the reward
# array can have either two or 3 dimensions.
#
# We want to make sure that the transition probability array and the
# We want to make sure that the transition probability array and the
# reward array are in agreement. This means that both should show that
# there are the same number of actions and the same number of states.
# Furthermore the probability of transition matrices must be SxS in
......@@ -238,7 +237,7 @@ def check(P, R):
# telling the user what needs to be fixed.
#
# if we are using a normal array for this, then the first
# dimension should be the number of actions, and the second and
# dimension should be the number of actions, and the second and
# third should be the number of states
#
# the first dimension of the transition matrix must report the same
......@@ -253,14 +252,14 @@ def check(P, R):
# normal arrays this is a matrix formed by taking a slice of the array
#
# if the rewarad array has an object dtype, then we check that
# each element contains a matrix of the same shape as we did
# each element contains a matrix of the same shape as we did
# above with the transition array.
#
# This indicates that the reward matrices are constructed per
# This indicates that the reward matrices are constructed per
# transition, so that the first dimension is the actions and
# the second two dimensions are the states.
#
# then the reward matrix is per state, so the first dimension is
# then the reward matrix is per state, so the first dimension is
# the states and the second dimension is the actions.
#
# this is added just so that the next check doesn't error out
......@@ -279,19 +278,19 @@ def rowsSumToOne(Z, n):
def checkSquareStochastic(Z):
"""Check if Z is a square stochastic matrix.
Let S = number of states.
Parameters
----------
Z : matrix
This should be a two dimensional array with a shape of (S, S). It can
possibly be sparse.
Notes
Notes
----------
Returns None if no error has been detected, else it raises an error.
"""
# try to get the shape of the matrix
try:
......@@ -299,42 +298,40 @@ def checkSquareStochastic(Z):
except AttributeError:
raise TypeError("Matrix should be a numpy type.")
except ValueError:
raise InvalidMDPError(mdperr["mat_square"])
raise InvalidMDPError(MDPERR["mat_square"])
# check that the matrix is square, and that each row sums to one
assert s1 == s2, mdperr["mat_square"]
assert rowsSumToOne(Z, s2), mdperr["mat_stoch"]
assert s1 == s2, MDPERR["mat_square"]
assert rowsSumToOne(Z, s2), MDPERR["mat_stoch"]
# make sure that there are no values less than zero
try:
assert (Z >= 0).all(), mdperr["mat_nonneg"]
assert (Z >= 0).all(), MDPERR["mat_nonneg"]
except (NotImplementedError, AttributeError, TypeError):
try:
assert (Z.data >= 0).all(), mdperr["mat_nonneg"]
assert (Z.data >= 0).all(), MDPERR["mat_nonneg"]
except AttributeError:
raise TypeError("Matrix should be a numpy type.")
return(None)
def getSpan(W):
"""Return the span of W
sp(W) = max W(s) - min W(s)
"""
return (W.max() - W.min())
return(W.max() - W.min())
class Error(Exception):
"""Base class for exceptions in this module."""
def __init__(self):
Exception.__init__(self)
self.message = "PyMDPToolbox: "
def __str__(self):
return repr(self.message)
class InvalidMDPError(Error):
"""Class for invalid definitions of a MDP."""
def __init__(self, msg):
Error.__init__(self)
self.message += msg
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment