### finite horizone algorithm code is fixed up

```New issue
Summary: the input of 'h' to FiniteHorizon is not checked that it is a sane value
Labels: Priority-Low
This should be checked to make sure it is a number. Perhaps it could also be a vector with a value for each state?```
parent 57517657
 ... ... @@ -480,7 +480,7 @@ class MDP(object): self.V = None self.policy = None def bellmanOperator(self): def bellmanOperator(self, V=None): """ Applies the Bellman operator on the value function. ... ... @@ -490,9 +490,16 @@ class MDP(object): ------- (policy, value) : tuple of new policy and its value """ # this V should be a reference to the data rather than a copy if V == None: V = self.V else: if not ((type(V) in (ndarray, matrix)) and (V.shape == (self.S, 1))): raise ValueError("V in bellmanOperator needs to be correct.") Q = matrix(zeros((self.S, self.A))) for aa in range(self.A): Q[:, aa] = self.R[:, aa] + (self.discount * self.P[aa] * self.V) Q[:, aa] = self.R[:, aa] + (self.discount * self.P[aa] * V) # Which way is better? if choose the first way, then the classes that # call this function must be changed ... ... @@ -616,23 +623,28 @@ class FiniteHorizon(MDP): else: self.N = N MDP.__init__(self, transitions, reward, discount, None) MDP.__init__(self, transitions, reward, discount, None, None) # remove the iteration counter del self.iter self.V = zeros((self.S, N + 1)) self.V = zeros(self.S, N + 1) self.policy = zeros((self.S, N), dtype=int) if not h is None: self.V[:, N + 1] = h self.V[:, N] = h def iterate(self): """""" self.time = time() for n in range(self.N - 1): W, X = self.bellmanOperator(self.P, self.R, self.discount, self.V[:, self.N - n + 1]) self.V[:, self.N - n] = W self.policy[:, self.N - n] = X for n in range(self.N): W, X = self.bellmanOperator(matrix(self.V[:, self.N - n]).reshape(self.S, 1)) self.V[:, self.N - n - 1] = X.A1 self.policy[:, self.N - n - 1] = W.A1 if self.verbose: print("stage: %s ... policy transpose : %s") % (self.N - n, self.policy[:, self.N - n].T) print("stage: %s ... policy transpose : %s") % (self.N - n, self.policy[:, self.N - n -1].tolist()) self.time = time() - self.time ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!