Commit 31712f2c authored by Steven Cordwell's avatar Steven Cordwell

added class FiniteHorizon

parent 0f855481
......@@ -484,9 +484,68 @@ class MDP(object):
self.verbose = True
class FiniteHorizon(MDP):
"""Resolution of finite-horizon MDP with backwards induction.
"""Reolution of finite-horizon MDP with backwards induction
Arguments
---------
Let S = number of states, A = number of actions
P(SxSxA) = transition matrix
P could be an array with 3 dimensions or
a cell array (1xA), each cell containing a matrix (SxS) possibly sparse
R(SxSxA) or (SxA) = reward matrix
R could be an array with 3 dimensions (SxSxA) or
a cell array (1xA), each cell containing a sparse matrix (SxS) or
a 2D array(SxA) possibly sparse
discount = discount factor, in ]0, 1]
N = number of periods, upper than 0
h(S) = terminal reward, optional (default [0; 0; ... 0] )
Evaluation
----------
V(S,N+1) = optimal value function
V(:,n) = optimal value function at stage n
with stage in 1, ..., N
V(:,N+1) = value function for terminal stage
policy(S,N) = optimal policy
policy(:,n) = optimal policy at stage n
with stage in 1, ...,N
policy(:,N) = policy for stage N
cpu_time = used CPU time
Notes
-----
In verbose mode, displays the current stage and policy transpose.
"""
raise NotImplementedError("This class has not been implemented yet.")
def __init__(self, P, R, discount, N, h):
if N < 1:
raise ValueError('MDP Toolbox ERROR: N must be upper than 0')
if discount <= 0 || discount > 1:
raise ValueError('MDP Toolbox ERROR: Discount rate must be in ]0; 1]')
if iscell(P):
S = size(P{1},1)
else:
S = size(P,1)
V = zeros(S,N+1)
if nargin == 5:
V(:,N+1) = h
PR = mdp_computePR(P,R);
def iterate():
self.time = time()
for n=0:N-1:
[W,X]=mdp_bellman_operator(P,PR,discount,V(:,N-n+1))
V(:,N-n)=W
policy(:,N-n) = X
#if mdp_VERBOSE
# disp(['stage:' num2str(N-n) ' policy transpose : ' num2str(policy(:,N-n)')])
self.time = time() - self.time
class LP(MDP):
"""Resolution of discounted MDP with linear programming.
......@@ -1087,7 +1146,7 @@ class ValueIterationGS(MDP):
Examples
--------
"""
"""
def __init__(self, transitions, reward, discount, epsilon=0.01, max_iter=10, initial_value=0):
""""""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment