Commit a76b938e by Steven Cordwell

parent 5b516cdf
 ... @@ -312,7 +312,17 @@ class MDP(object): ... @@ -312,7 +312,17 @@ class MDP(object): if (not type(R) is ndarray): if (not type(R) is ndarray): raise TypeError(mdperr["R_type"]) raise TypeError(mdperr["R_type"]) # NumPy has an array type of 'object', which is roughly equivalent to # the MATLAB cell array. These are most useful for storing sparse # matrices as these can only have two dimensions whereas we want to be # able to store a transition matrix for each action. If the dytpe of # the transition probability array is object then we store this as # P_is_object = True. # If it is an object array, then it should only have one dimension # otherwise fail with a message expalining why. # If it is a normal array then the number of dimensions must be exactly # three, otherwise fail with a message explaining why. if (P.dtype == object): if (P.dtype == object): if (P.ndim > 1): if (P.ndim > 1): raise ValueError(mdperr["obj_shape"]) raise ValueError(mdperr["obj_shape"]) ... @@ -323,7 +333,9 @@ class MDP(object): ... @@ -323,7 +333,9 @@ class MDP(object): raise ValueError(mdperr["P_shape"]) raise ValueError(mdperr["P_shape"]) else: else: P_is_object = False P_is_object = False # As above but for the reward array. A difference is that the reward # array can have either two or 3 dimensions. if (R.dtype == object): if (R.dtype == object): if (R.ndim > 1): if (R.ndim > 1): raise ValueError(mdperr["obj_shape"]) raise ValueError(mdperr["obj_shape"]) ... @@ -335,50 +347,99 @@ class MDP(object): ... @@ -335,50 +347,99 @@ class MDP(object): else: else: R_is_object = False R_is_object = False # We want to make sure that the transition probability array and the # reward array are in agreement. This means that both should show that # there are the same number of actions and the same number of states. # Furthermore the probability of transition matrices must be SxS in # shape, so we check for that also. if P_is_object: if P_is_object: # If the user has put their transition matrices into a numpy array # with dtype of 'object', then it is possible that they have made a # mistake and not all of the matrices are of the same shape. So, # here we record the number of actions and states that the first # matrix in element zero of the object array says it has. After # that we check that every other matrix also reports the same # number of actions and states, otherwise fail with an error. # aP: the number of actions in the transition array. This # corresponds to the number of elements in the object array. aP = P.shape[0] aP = P.shape[0] sP0 = P[0].shape[0] # sP0: the number of states as reported by the number of rows of sP1 = P[0].shape[1] # the transition matrix # check to see that the other object array elements are the same shape # sP1: the number of states as reported by the number of columns of # the transition matrix sP0, sP1 = P[0].shape # Now we check to see that every element of the object array holds # a matrix of the same shape, otherwise fail. for aa in range(1, aP): for aa in range(1, aP): sP0aa = P[aa].shape[0] # sp0aa and sp1aa represents the number of states in each sP1aa = P[aa].shape[1] # subsequent element of the object array. If it doesn't match # what was found in the first element, then we need to fail # telling the user what needs to be fixed. sP0aa, sP1aa = P[aa].shape if ((sP0aa != sP0) or (sP1aa != sP1)): if ((sP0aa != sP0) or (sP1aa != sP1)): raise ValueError(mdperr["obj_square"]) raise ValueError(mdperr["obj_square"]) else: else: # if we are using a normal array for this, then the first # dimension should be the number of actions, and the second and # third should be the number of states aP, sP0, sP1 = P.shape aP, sP0, sP1 = P.shape # the first dimension of the transition matrix must report the same # number of states as the second dimension. If not then we are not # dealing with a square matrix and it is not a valid transition # probability. Also, if the number of actions is less than one, or the # number of states is less than one, then it also is not a valid # transition probability. if ((sP0 < 1) or (aP < 1) or (sP0 != sP1)): if ((sP0 < 1) or (aP < 1) or (sP0 != sP1)): raise ValueError(mdperr["P_shape"]) raise ValueError(mdperr["P_shape"]) # now we check that each transition matrix is square-stochastic. For # object arrays this is the matrix held in each element, but for # normal arrays this is a matrix formed by taking a slice of the array for aa in range(aP): for aa in range(aP): if P_is_object: if P_is_object: self.checkSquareStochastic(P[aa]) self.checkSquareStochastic(P[aa]) else: else: self.checkSquareStochastic(P[aa, :, :]) self.checkSquareStochastic(P[aa, :, :]) aa = aa + 1 # aa = aa + 1 # why was this here? if R_is_object: if R_is_object: # if the rewarad array has an object dtype, then we check that # each element contains a matrix of the same shape as we did # above with the transition array. aR = R.shape[0] aR = R.shape[0] sR0 = R[0].shape[0] sR0, sR1 = R[0].shape sR1 = R[0].shape[1] # check to see that the other object array elements are the same shape for aa in range(1, aR): for aa in range(1, aR): sR0aa = R[aa].shape[0] sR0aa, sR1aa = R[aa].shape sR1aa = R[aa].shape[1] if ((sR0aa != sR0) or (sR1aa != sR1)): if ((sR0aa != sR0) or (sR1aa != sR1)): raise ValueError(mdperr["obj_square"]) raise ValueError(mdperr["obj_square"]) elif (R.ndim == 3): elif (R.ndim == 3): # This indicates that the reward matrices are constructed per # transition, so that the first dimension is the actions and # the second two dimensions are the states. aR, sR0, sR1 = R.shape aR, sR0, sR1 = R.shape else: else: # then the reward matrix is per state, so the first dimension is # the states and the second dimension is the actions. sR0, aR = R.shape sR0, aR = R.shape # this is added just so that the next check doesn't error out # saying that sR1 doesn't exist sR1 = sR0 sR1 = sR0 # the number of actions must be more than zero, the number of states # must also be more than 0, and the states must agree if ((sR0 < 1) or (aR < 1) or (sR0 != sR1)): if ((sR0 < 1) or (aR < 1) or (sR0 != sR1)): raise ValueError(mdperr["R_shape"]) raise ValueError(mdperr["R_shape"]) # now we check to see that what the transition array is reporting and # what the reward arrar is reporting agree as to the number of actions # and states. If not then fail explaining the situation if (sP0 != sR0) or (aP != aR): if (sP0 != sR0) or (aP != aR): raise ValueError(mdperr["PR_incompat"]) raise ValueError(mdperr["PR_incompat"]) # We are at the end of the checks, so if no exceptions have been raised # then that means there are (hopefullly) no errors and we return None return None def checkSquareStochastic(self, Z): def checkSquareStochastic(self, Z): """Check if Z is a square stochastic matrix """Check if Z is a square stochastic matrix ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!