Commit 969abd92 authored by Steven Cordwell's avatar Steven Cordwell
Browse files

work on adding more documentation

parent af739800
......@@ -256,7 +256,9 @@ class MDP(object):
def _bellmanOperator(self, V=None):
# Apply the Bellman operator on the value function.
# Updates the value function and the Vprev-improving policy.
# Returns: (policy, value), tuple of new policy and its value
# If V hasn't been sent into the method, then we assume to be working
......@@ -271,7 +273,10 @@ class MDP(object):
raise ValueError("bellman: V is not the right shape.")
except AttributeError:
raise TypeError("bellman: V must be a numpy array or matrix.")
# Looping through each action the the Q-value matrix is calculated
# Looping through each action the the Q-value matrix is calculated.
# P and V can be any object that supports indexing, so it is important
# that you know they define a valid MDP before calling the
# _bellmanOperator method. Otherwise the results will be meaningless.
Q = empty((self.A, self.S))
for aa in range(self.A):
Q[aa] = self.R[aa] + * self.P[aa].dot(V)
......@@ -8,7 +8,7 @@ Created on Sun Aug 18 14:30:09 2013
from numpy import absolute, ones
def check(P, R):
"""Check if P and R define a Markov Decision Process.
"""Check if P and R define a valid Markov Decision Process (MDP).
Let S = number of states, A = number of actions.
......@@ -29,6 +29,17 @@ def check(P, R):
Raises an error if P and R do not define a MDP.
>>> import mdptoolbox, mdptoolbox.example
>>> P_valid, R_valid = mdptoolbox.example.rand(100, 5)
>>> mdptoolbox.utils.check(P_valid, R_valid) # Nothing should happen
>>> import numpy as np
>>> P_invalid = np.random.rand(5, 100, 100)
>>> mdptoolbox.utils.check(P_invalid, R_valid)
# Checking P
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment