# -*- coding: utf-8 -*- """Markov Decision Process (MDP) Toolbox: ``utils`` module ======================================================= The ``utils`` module provides functions to check that an MDP is validly described. There are also functions for working with MDPs while they are being solved. Available functions ------------------- check Check that an MDP is properly defined checkSquareStochastic Check that a matrix is square and stochastic getSpan Calculate the span of an array """ # Copyright (c) 2011-2013 Steven A. W. Cordwell # Copyright (c) 2009 INRA # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # * Neither the name of the nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. from numpy import absolute, ones SMALLNUM = 10e-12 # These need to be fixed so that we use classes derived from Error. mdperr = { "mat_nonneg" : "Transition probabilities must be non-negative.", "mat_square" : "A transition probability matrix must be square, with dimensions S×S.", "mat_stoch" : "Each row of a transition probability matrix must sum to one (1).", "obj_shape" : "Object arrays for transition probabilities and rewards " "must have only 1 dimension: the number of actions A. Each element of " "the object array contains an SxS ndarray or matrix.", "obj_square" : "Each element of an object array for transition " "probabilities and rewards must contain an SxS ndarray or matrix; i.e. " "P[a].shape = (S, S) or R[a].shape = (S, S).", "P_type" : "The transition probabilities must be in a numpy array; " "i.e. type(P) is ndarray.", "P_shape" : "The transition probability array must have the shape " "(A, S, S) with S : number of states greater than 0 and A : number of " "actions greater than 0. i.e. R.shape = (A, S, S)", "PR_incompat" : "Incompatibility between P and R dimensions.", "R_type" : "The rewards must be in a numpy array; i.e. type(R) is " "ndarray, or numpy matrix; i.e. type(R) is matrix.", "R_shape" : "The reward matrix R must be an array of shape (A, S, S) or " "(S, A) with S : number of states greater than 0 and A : number of " "actions greater than 0. i.e. R.shape = (S, A) or (A, S, S)." } def check(P, R): """Check if ``P`` and ``R`` define a valid Markov Decision Process (MDP). Let ``S`` = number of states, ``A`` = number of actions. Parameters --------- P : array The transition matrices. It can be a three dimensional array with a shape of (A, S, S). It can also be a one dimensional arraye with a shape of (A, ), where each element contains a matrix of shape (S, S) which can possibly be sparse. R : array The reward matrix. It can be a three dimensional array with a shape of (S, A, A). It can also be a one dimensional array with a shape of (A, ), where each element contains matrix with a shape of (S, S) which can possibly be sparse. It can also be an array with a shape of (S, A) which can possibly be sparse. Notes ----- Raises an error if ``P`` and ``R`` do not define a MDP. Examples -------- >>> import mdptoolbox, mdptoolbox.example >>> P_valid, R_valid = mdptoolbox.example.rand(100, 5) >>> mdptoolbox.utils.check(P_valid, R_valid) # Nothing should happen >>> >>> import numpy as np >>> P_invalid = np.random.rand(5, 100, 100) >>> mdptoolbox.utils.check(P_invalid, R_valid) # Raises an exception """ # Checking P try: if P.ndim == 3: aP, sP0, sP1 = P.shape elif P.ndim == 1: # A hack so that we can go into the next try-except statement and # continue checking from there raise AttributeError else: raise InvalidMDPError(mdperr["P_shape"]) except AttributeError: try: aP = len(P) sP0, sP1 = P[0].shape for aa in range(1, aP): sP0aa, sP1aa = P[aa].shape if (sP0aa != sP0) or (sP1aa != sP1): raise InvalidMDPError(mdperr["obj_square"]) except AttributeError: raise InvalidMDPError(mdperr["P_shape"]) # Checking R try: ndimR = R.ndim if ndimR == 1: # A hack so that we can go into the next try-except statement raise AttributeError elif ndimR == 2: sR0, aR = R.shape sR1 = sR0 elif ndimR == 3: aR, sR0, sR1 = R.shape else: raise InvalidMDPError(mdperr["R_shape"]) except AttributeError: try: lenR = len(R) if lenR == aP: aR = lenR sR0, sR1 = R[0].shape for aa in range(1, aR): sR0aa, sR1aa = R[aa].shape if ((sR0aa != sR0) or (sR1aa != sR1)): raise InvalidMDPError(mdperr["obj_square"]) elif lenR == sP0: aR = aP sR0 = sR1 = lenR else: raise InvalidMDPError(mdperr["R_shape"]) except AttributeError: raise InvalidMDPError(mdperr["R_shape"]) # Checking dimensions assert sP0 > 0, "The number of states in P must be greater than 0." assert aP > 0, "The number of actions in P must be greater than 0." assert sP0 == sP1, "The matrix P must be square with respect to states." assert sR0 > 0, "The number of states in R must be greater than 0." assert aR > 0, "The number of actions in R must be greater than 0." assert sR0 == sR1, "The matrix R must be square with respect to states." assert sP0 == sR0, "The number of states must agree in P and R." assert aP == aR, "The number of actions must agree in P and R." # Check that the P's are square and stochastic for aa in range(aP): checkSquareStochastic(P[aa]) # We are at the end of the checks, so if no exceptions have been raised # then that means there are (hopefullly) no errors and we return None return None # These are the old code comments, which need to be converted to # information in the docstring: # # tranitions must be a numpy array either an AxSxS ndarray (with any # dtype other than "object"); or, a 1xA ndarray with a "object" dtype, # and each element containing an SxS array. An AxSxS array will be # be converted to an object array. A numpy object array is similar to a # MATLAB cell array. # # NumPy has an array type of 'object', which is roughly equivalent to # the MATLAB cell array. These are most useful for storing sparse # matrices as these can only have two dimensions whereas we want to be # able to store a transition matrix for each action. If the dytpe of # the transition probability array is object then we store this as # P_is_object = True. # If it is an object array, then it should only have one dimension # otherwise fail with a message expalining why. # If it is a normal array then the number of dimensions must be exactly # three, otherwise fail with a message explaining why. # # As above but for the reward array. A difference is that the reward # array can have either two or 3 dimensions. # # We want to make sure that the transition probability array and the # reward array are in agreement. This means that both should show that # there are the same number of actions and the same number of states. # Furthermore the probability of transition matrices must be SxS in # shape, so we check for that also. # # If the user has put their transition matrices into a numpy array # with dtype of 'object', then it is possible that they have made a # mistake and not all of the matrices are of the same shape. So, # here we record the number of actions and states that the first # matrix in element zero of the object array says it has. After # that we check that every other matrix also reports the same # number of actions and states, otherwise fail with an error. # aP: the number of actions in the transition array. This # corresponds to the number of elements in the object array. # # sP0: the number of states as reported by the number of rows of # the transition matrix # sP1: the number of states as reported by the number of columns of # the transition matrix # # Now we check to see that every element of the object array holds # a matrix of the same shape, otherwise fail. # # sp0aa and sp1aa represents the number of states in each # subsequent element of the object array. If it doesn't match # what was found in the first element, then we need to fail # telling the user what needs to be fixed. # # if we are using a normal array for this, then the first # dimension should be the number of actions, and the second and # third should be the number of states # # the first dimension of the transition matrix must report the same # number of states as the second dimension. If not then we are not # dealing with a square matrix and it is not a valid transition # probability. Also, if the number of actions is less than one, or the # number of states is less than one, then it also is not a valid # transition probability. # # now we check that each transition matrix is square-stochastic. For # object arrays this is the matrix held in each element, but for # normal arrays this is a matrix formed by taking a slice of the array # # if the rewarad array has an object dtype, then we check that # each element contains a matrix of the same shape as we did # above with the transition array. # # This indicates that the reward matrices are constructed per # transition, so that the first dimension is the actions and # the second two dimensions are the states. # # then the reward matrix is per state, so the first dimension is # the states and the second dimension is the actions. # # this is added just so that the next check doesn't error out # saying that sR1 doesn't exist # # the number of actions must be more than zero, the number of states # must also be more than 0, and the states must agree # # now we check to see that what the transition array is reporting and # what the reward arrar is reporting agree as to the number of actions # and states. If not then fail explaining the situation def checkSquareStochastic(Z): """Check if Z is a square stochastic matrix. Let S = number of states. Parameters ---------- Z : matrix This should be a two dimensional array with a shape of (S, S). It can possibly be sparse. Notes ---------- Returns None if no error has been detected, else it raises an error. """ # try to get the shape of the matrix try: s1, s2 = Z.shape except AttributeError: raise TypeError("Matrix should be a numpy type.") except ValueError: raise InvalidMDPError(mdperr["mat_square"]) # check that the matrix is square, and that each row sums to one assert s1 == s2, mdperr["mat_square"] assert (absolute(Z.sum(axis=1) - ones(s2))).max() < SMALLNUM, \ mdperr["mat_stoch"] # make sure that there are no values less than zero try: assert (Z >= 0).all(), mdperr["mat_nonneg"] except (AttributeError, TypeError): try: assert (Z.data >= 0).all(), mdperr["mat_nonneg"] except AttributeError: raise TypeError("Matrix should be a numpy type.") return(None) def getSpan(W): """Return the span of W sp(W) = max W(s) - min W(s) """ return (W.max() - W.min()) class Error(Exception): """Base class for exceptions in this module.""" def __init__(self): Exception.__init__(self) self.message = "PyMDPToolbox: " def __str__(self): return repr(self.message) class InvalidMDPError(Error): """Class for invalid definitions of a MDP.""" def __init__(self, msg): Error.__init__(self) self.message += msg self.args = tuple(msg) if __name__ == "__main__": import doctest doctest.testmod()