Commit 3ef9f28e authored by Steven Cordwell's avatar Steven Cordwell
Browse files

clean up conversion to tuples to use x.getA1() rather than array(x).reshape()

parent 6dbe527c
......@@ -964,8 +964,8 @@ class PolicyIteration(MDP):
self.time = time() - self.time
# store value and policy as tuples
self.V = tuple(array(self.V).reshape(self.S).tolist())
self.policy = tuple(array(self.policy).reshape(self.S).tolist())
self.V = tuple(self.V.getA1().tolist())
self.policy = tuple(self.policy.getA1().tolist())
class PolicyIterationModified(PolicyIteration):
"""Resolution of discounted MDP with policy iteration algorithm
......@@ -1209,6 +1209,10 @@ class QLearning(MDP):
self.time = time() - self.time
# convert V and policy to tuples
self.V = tuple(self.V.getA1().tolist())
self.policy = tuple(self.policy.getA1().tolist())
# rather than report that we have not done any iterations, assign the
# value of n_iter to self.iter
self.iter = self.max_iter
......@@ -1516,8 +1520,8 @@ class ValueIteration(MDP):
print("...iterations stopped by maximum number of iteration condition")
# store value and policy as tuples
self.V = tuple(array(self.V).reshape(self.S).tolist())
self.policy = tuple(array(self.policy).reshape(self.S).tolist())
self.V = tuple(self.V.getA1().tolist())
self.policy = tuple(self.policy.getA1().tolist())
self.time = time() - self.time
......@@ -1610,5 +1614,5 @@ class ValueIterationGS(ValueIteration):
self.time = time() - self.time
self.V = tuple(array(self.V).reshape(self.S).tolist())
self.V = tuple(self.V.getA1().tolist())
self.policy = tuple(self.policy)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment