return(False,"You are using an object array for the transition probability: The array must have only 1 dimension: A. Each element of the contains a SxS array.")
return(False,"You are using an object array for the reward: The array must have only 1 dimension: A. Each element of the contains a SxS array.")
raiseTypeError(mdperr["R_ndarray"])
if(P.dtypeisobject):
if(P.dtype==object):
if(P.ndim>1):
raiseValueError(mdperr["obj_shape"])
else:
P_is_object=True
else:
if(P.ndim!=3):
raiseValueError(mdperr["P_shape"])
else:
P_is_object=False
if(R.dtypeisobject):
if(R.dtype==object):
if(R.ndim>1):
raiseValueError(mdperr["obj_shape"])
else:
R_is_object=True
else:
if(notR.ndimin(2,3)):
raiseValueError(mdperr["R_shape"])
else:
R_is_object=False
...
...
@@ -125,75 +288,66 @@ class MDP():
aP=P.shape[0]
sP0=P[0].shape[0]
sP1=P[0].shape[1]
# check to see that each object array element is the same size
# check to see that the other object array elements are the same shape
foraainrange(1,aP):
sP0aa=P[aa].shape[0]
sP1aa=P[aa].shape[1]
if((sP0aa!=sP0)or(sP1aa!=sP1)):
is_error_detected=True
err_msg="You are using and object array for the transition probability: The dimensions of each array within the object array must be equal to each other."
break
raiseValueError(mdperr["obj_square"])
else:
aP,sP0,sP1=P.shape
if((sP0<1)or(aP<1)or(sP0!=sP1)):
is_error_detected=True
err_msg="The transition probability array must have the shape (A, S, S) with S : number of states greater than 0 and A : number of actions greater than 0."
raiseValueError(mdperr["P_shape"])
if(notis_error_detected):
aa=1
whileaa<=aP:
foraainrange(aP):
ifP_is_object:
err_msg=self.checkSquareStochastic(P[aa])
self.checkSquareStochastic(P[aa])
else:
err_msg=self.checkSquareStochastic(P[aa,:,:])
if(err_msg==None):
self.checkSquareStochastic(P[aa,:,:])
aa=aa+1
else:
is_error_detected=True
aa=aP+1
if(notis_error_detected):
ifR_is_object:
aR=R.shape[0]
sR0=R[0].shape[0]
sR1=R[0].shape[1]
aR=R.shape[0]
elifR.ndim==3:
# check to see that the other object array elements are the same shape
foraainrange(1,aR):
sR0aa=R[aa].shape[0]
sR1aa=R[aa].shape[1]
if((sR0aa!=sR0)or(sR1aa!=sR1)):
raiseValueError(mdperr["obj_square"])
elif(R.ndim==3):
aR,sR0,sR1=R.shape
else:
sR0,aR=R.shape
sR1=sR0
if((sR0<1)or(aR<1)or(sR0!=sR1)):
is_error_detected=True
err_msg="MDP Toolbox ERROR: The reward matrix R must be an array (S,S,A) or (SxA) with S : number of states greater than 0 and A : number of actions greater than 0"
is_error_detected=True
raiseValueError(mdperr["R_shape"])
if(notis_error_detected):
if(sP0!=sR0)or(aP!=aR):
err_msg="MDP Toolbox ERROR: Incompatibility between P and R dimensions"