Commit 31712f2c by Steven Cordwell

parent 0f855481
 ... ... @@ -484,9 +484,68 @@ class MDP(object): self.verbose = True class FiniteHorizon(MDP): """Resolution of finite-horizon MDP with backwards induction. """Reolution of finite-horizon MDP with backwards induction Arguments --------- Let S = number of states, A = number of actions P(SxSxA) = transition matrix P could be an array with 3 dimensions or a cell array (1xA), each cell containing a matrix (SxS) possibly sparse R(SxSxA) or (SxA) = reward matrix R could be an array with 3 dimensions (SxSxA) or a cell array (1xA), each cell containing a sparse matrix (SxS) or a 2D array(SxA) possibly sparse discount = discount factor, in ]0, 1] N = number of periods, upper than 0 h(S) = terminal reward, optional (default [0; 0; ... 0] ) Evaluation ---------- V(S,N+1) = optimal value function V(:,n) = optimal value function at stage n with stage in 1, ..., N V(:,N+1) = value function for terminal stage policy(S,N) = optimal policy policy(:,n) = optimal policy at stage n with stage in 1, ...,N policy(:,N) = policy for stage N cpu_time = used CPU time Notes ----- In verbose mode, displays the current stage and policy transpose. """ raise NotImplementedError("This class has not been implemented yet.") def __init__(self, P, R, discount, N, h): if N < 1: raise ValueError('MDP Toolbox ERROR: N must be upper than 0') if discount <= 0 || discount > 1: raise ValueError('MDP Toolbox ERROR: Discount rate must be in ]0; 1]') if iscell(P): S = size(P{1},1) else: S = size(P,1) V = zeros(S,N+1) if nargin == 5: V(:,N+1) = h PR = mdp_computePR(P,R); def iterate(): self.time = time() for n=0:N-1: [W,X]=mdp_bellman_operator(P,PR,discount,V(:,N-n+1)) V(:,N-n)=W policy(:,N-n) = X #if mdp_VERBOSE # disp(['stage:' num2str(N-n) ' policy transpose : ' num2str(policy(:,N-n)')]) self.time = time() - self.time class LP(MDP): """Resolution of discounted MDP with linear programming. ... ... @@ -1087,7 +1146,7 @@ class ValueIterationGS(MDP): Examples -------- """ """ def __init__(self, transitions, reward, discount, epsilon=0.01, max_iter=10, initial_value=0): """""" ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!