Commit 6fd9a818 authored by Steven Cordwell's avatar Steven Cordwell
Browse files

tidy up user facing messages

parent df7f9e55
......@@ -66,6 +66,14 @@ from scipy.sparse import csr_matrix as sparse
from .utils import check, getSpan
MSG_STOP_MAX_ITER = "Iterating stopped due to maximum number of iterations " \
"condition."
MSG_STOP_EPSILON_OPTIMAL_POLICY = "Iterating stopped, epsilon-optimal " \
"policy found."
MSG_STOP_EPSILON_OPTIMAL_VALUE = "Iterating stopped, epsilon-optimal value " \
"function found."
MSG_STOP_UNCHANGING_POLICY = "Iterating stopped, unchanging policy found."
class MDP(object):
"""A Markov Decision Problem.
......@@ -158,7 +166,7 @@ class MDP(object):
assert 0.0 < self.discount <= 1.0, "Discount rate must be in ]0; 1]"
if self.discount == 1:
print("WARNING: check conditions of convergence. With no "
"discount, convergence is can not be assumed.")
"discount, convergence can not be assumed.")
# if the max_iter is None then the algorithm is assumed to not use it
# in its computations
if max_iter is not None:
......@@ -350,7 +358,7 @@ class FiniteHorizon(MDP):
def __init__(self, transitions, reward, discount, N, h=None):
# Initialise a finite horizon MDP.
self.N = int(N)
assert self.N > 0, 'PyMDPtoolbox: N must be greater than 0.'
assert self.N > 0, "N must be greater than 0."
# Initialise the base class
MDP.__init__(self, transitions, reward, discount, None, None)
# remove the iteration counter, it is not meaningful for backwards
......@@ -373,23 +381,19 @@ class FiniteHorizon(MDP):
# loop through each time period
for n in range(self.N):
W, X = self._bellmanOperator(self.V[:, self.N - n])
self.V[:, self.N - n - 1] = X
self.policy[:, self.N - n - 1] = W
stage = self.N - n - 1
self.V[:, stage] = X
self.policy[:, stage] = W
if self.verbose:
print(("stage: %s ... policy transpose : %s") % (
self.N - n, self.policy[:, self.N - n -1].tolist()))
print(("stage: %s, policy: %s") % (
stage, self.policy[:, stage].tolist()))
# update time spent running
self.time = time() - self.time
# After this we could create a tuple of tuples for the values and
# policies.
#V = []
#p = []
#for n in xrange(self.N):
# V.append()
# p.append()
#V.append()
#self.V = tuple(V)
#self.policy = tuple(p)
#self.V = tuple(tuple(self.V[:, n].tolist()) for n in range(self.N))
#self.policy = tuple(tuple(self.policy[:, n].tolist())
# for n in range(self.N))
class LP(MDP):
......@@ -420,10 +424,6 @@ class LP(MDP):
time : float
used CPU time
Notes
-----
In verbose mode, displays the current stage and policy transpose.
Examples
--------
>>> import mdptoolbox, mdptoolbox.example
......@@ -585,10 +585,9 @@ class PolicyIteration(MDP):
elif eval_type in (1, "iterative"):
self.eval_type = "iterative"
else:
raise ValueError("PyMDPtoolbox: eval_type should be 0 for matrix "
"evaluation or 1 for iterative evaluation. "
"The strings 'matrix' and 'iterative' can also "
"be used.")
raise ValueError("'eval_type' should be '0' for matrix evaluation "
"or '1' for iterative evaluation. The strings "
"'matrix' and 'iterative' can also be used.")
# Call the iteration method
#self.run()
......@@ -673,15 +672,15 @@ class PolicyIteration(MDP):
"'V0' must be a vector of length S."
policy_V = array(V0).reshape(self.S)
except AttributeError:
if len(V0) == self.S:
policy_V = array(V0).reshape(self.S)
else:
if V0 == 0:
policy_V = zeros(self.S)
else:
policy_V = array(V0).reshape(self.S)
policy_P, policy_R = self._computePpolicyPRpolicy()
if self.verbose:
print(' Iteration V_variation')
print(' Iteration\t\t V variation')
itr = 0
done = False
......@@ -693,19 +692,17 @@ class PolicyIteration(MDP):
variation = absolute(policy_V - Vprev).max()
if self.verbose:
print((' %s %s') % (itr, variation))
print((' %s\t\t %s') % (itr, variation))
# ensure |Vn - Vpolicy| < epsilon
if variation < ((1 - self.discount) / self.discount) * epsilon:
done = True
if self.verbose:
print("PyMDPtoolbox: iterations stopped, epsilon-optimal "
"value function.")
print(MSG_STOP_EPSILON_OPTIMAL_VALUE)
elif itr == max_iter:
done = True
if self.verbose:
print("PyMDPtoolbox: iterations stopped by maximum number "
"of iteration condition.")
print(MSG_STOP_MAX_ITER)
self.V = policy_V
......@@ -738,7 +735,7 @@ class PolicyIteration(MDP):
# Run the policy iteration algorithm.
# If verbose the print a header
if self.verbose:
print(' Iteration Number_of_different_actions')
print(' Iteration\t\tNumber of different actions')
# Set up the while stopping condition and the current time
done = False
self.time = time()
......@@ -760,20 +757,17 @@ class PolicyIteration(MDP):
n_different = (policy_next != self.policy).sum()
# if verbose then continue printing a table
if self.verbose:
print((' %s %s') % (self.iter,
n_different))
print((' %s\t\t %s') % (self.iter, n_different))
# Once the policy is unchanging of the maximum number of
# of iterations has been reached then stop
if n_different == 0:
done = True
if self.verbose:
print("PyMDPtoolbox: iterations stopped, unchanging "
"policy found.")
print(MSG_STOP_UNCHANGING_POLICY)
elif (self.iter == self.max_iter):
done = True
if self.verbose:
print("PyMDPtoolbox: iterations stopped by maximum number "
"of iteration condition.")
print(MSG_STOP_MAX_ITER)
else:
self.policy = policy_next
# update the time to return th computation time
......@@ -865,7 +859,7 @@ class PolicyIterationModified(PolicyIteration):
# Run the modified policy iteration algorithm.
if self.verbose:
print('\tIteration\tV-variation')
print(' \tIteration\t\tV-variation')
self.time = time()
......@@ -878,7 +872,7 @@ class PolicyIterationModified(PolicyIteration):
variation = getSpan(Vnext - self.V)
if self.verbose:
print(("\t%s\t%s" % (self.iter, variation)))
print((" %s\t\t %s" % (self.iter, variation)))
self.V = Vnext
if variation < self.thresh:
......@@ -973,8 +967,7 @@ class QLearning(MDP):
# The following check won't be done in MDP()'s initialisation, so let's
# do it here
self.max_iter = int(n_iter)
assert self.max_iter >= 10000, "PyMDPtoolbox: n_iter should be " \
"greater than 10000."
assert self.max_iter >= 10000, "'n_iter' should be greater than 10000."
# We don't want to send this to MDP because _computePR should not be
# run on it, so check that it defines an MDP
......@@ -1029,7 +1022,10 @@ class QLearning(MDP):
try:
r = self.R[a][s, s_new]
except IndexError:
try:
r = self.R[s, a]
except IndexError:
r = self.R[s]
# Updating the value of Q
# Decaying update coefficient (1/sqrt(n+2)) can be changed
......@@ -1143,7 +1139,7 @@ class RelativeValueIteration(MDP):
done = False
if self.verbose:
print(' Iteration U_variation')
print(' Iteration\t\tU variation')
self.time = time()
......@@ -1157,20 +1153,18 @@ class RelativeValueIteration(MDP):
variation = getSpan(Vnext - self.V)
if self.verbose:
print((" %s %s" % (self.iter, variation)))
print((" %s\t\t %s" % (self.iter, variation)))
if variation < self.epsilon:
done = True
self.average_reward = self.gain + (Vnext - self.V).min()
if self.verbose:
print("MDP Toolbox : iterations stopped, epsilon-optimal "
"policy found.")
print(MSG_STOP_EPSILON_OPTIMAL_POLICY)
elif self.iter == self.max_iter:
done = True
self.average_reward = self.gain + (Vnext - self.V).min()
if self.verbose:
print("MDP Toolbox : iterations stopped by maximum "
"number of iteration condition.")
print(MSG_STOP_MAX_ITER)
self.V = Vnext
self.gain = float(self.V[self.S - 1])
......@@ -1296,7 +1290,7 @@ class ValueIteration(MDP):
24 0.00200550617512
25 0.00144396444609
26 0.0010396544012
PyMDPToolbox: iteration stopped, epsilon-optimal policy found.
Iterating stopped, epsilon-optimal policy found.
>>> vi.V
(40.048625392716815, 33.65371175967546)
>>> vi.policy
......@@ -1396,7 +1390,7 @@ class ValueIteration(MDP):
# Run the value iteration algorithm.
if self.verbose:
print('\tIteration\tV-variation')
print(' Iteration\t\tV-variation')
self.time = time()
while True:
......@@ -1413,16 +1407,15 @@ class ValueIteration(MDP):
variation = getSpan(self.V - Vprev)
if self.verbose:
print(("\t%s\t%s" % (self.iter, variation)))
print((" %s\t\t %s" % (self.iter, variation)))
if variation < self.thresh:
if self.verbose:
print("Iteration stopped, epsilon-optimal policy found.")
print(MSG_STOP_EPSILON_OPTIMAL_POLICY)
break
elif (self.iter == self.max_iter):
if self.verbose:
print("Iteration stopped by maximum number of iterations "
"condition.")
print(MSG_STOP_MAX_ITER)
break
# store value and policy as tuples
......@@ -1524,7 +1517,7 @@ class ValueIterationGS(ValueIteration):
done = False
if self.verbose:
print(' Iteration V_variation')
print(' Iteration\t\tV-variation')
self.time = time()
......@@ -1543,18 +1536,16 @@ class ValueIterationGS(ValueIteration):
variation = getSpan(self.V - Vprev)
if self.verbose:
print((" %s %s" % (self.iter, variation)))
print((" %s\t\t %s" % (self.iter, variation)))
if variation < self.thresh:
done = True
if self.verbose:
print("Iterations stopped, epsilon-optimal policy found.")
print(MSG_STOP_EPSILON_OPTIMAL_POLICY)
elif self.iter == self.max_iter:
done = True
if self.verbose:
print("Iterations stopped by maximum number of iteration "
"condition.")
print(MSG_STOP_MAX_ITER)
self.policy = []
for s in range(self.S):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment