Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
b7bfb004
Commit
b7bfb004
authored
Jun 21, 2013
by
Steven Cordwell
Browse files
define the iteration methods as 'private' and call them from the __init__ function
parent
429a16b4
Changes
1
Show whitespace changes
Inline
Side-by-side
mdp.py
View file @
b7bfb004
...
...
@@ -818,9 +818,9 @@ class MDP(object):
raise
self
.
R
=
tuple
(
self
.
R
)
def
iterate
(
self
):
def
_
iterate
(
self
):
"""Raise error because child classes should implement this function."""
raise
NotImplementedError
(
"You should create an iterate() method."
)
raise
NotImplementedError
(
"You should create an
_
iterate() method."
)
def
setSilent
(
self
):
"""Set the MDP algorithm to silent mode."""
...
...
@@ -903,7 +903,10 @@ class FiniteHorizon(MDP):
if
h
is
not
None
:
self
.
V
[:,
N
]
=
h
def
iterate
(
self
):
# Call the iteration method
self
.
_iterate
()
def
_iterate
(
self
):
"""Run the finite horizon algorithm."""
self
.
time
=
time
()
...
...
@@ -979,7 +982,10 @@ class LP(MDP):
if
not
self
.
verbose
:
solvers
.
options
[
'show_progress'
]
=
False
def
iterate
(
self
):
# Call the iteration method
self
.
_iterate
()
def
_iterate
(
self
):
"""Run the linear programming algorithm."""
self
.
time
=
time
()
# The objective is to resolve : min V / V >= PR + discount*P*V
...
...
@@ -1051,7 +1057,6 @@ class PolicyIteration(MDP):
>>> import mdp
>>> P, R = mdp.exampleRand(5, 3)
>>> pi = mdp.PolicyIteration(P, R, 0.9)
>>> pi.iterate()
"""
...
...
@@ -1100,6 +1105,9 @@ class PolicyIteration(MDP):
"The strings 'matrix' and 'iterative' can also "
"be used."
)
# Call the iteration method
self
.
_iterate
()
def
_computePpolicyPRpolicy
(
self
):
"""Compute the transition matrix and the reward matrix for a policy.
...
...
@@ -1243,7 +1251,7 @@ class PolicyIteration(MDP):
self
.
V
=
self
.
_lin_eq
(
(
self
.
_speye
(
self
.
S
,
self
.
S
)
-
self
.
discount
*
Ppolicy
),
Rpolicy
)
def
iterate
(
self
):
def
_
iterate
(
self
):
"""Run the policy iteration algorithm."""
if
self
.
verbose
:
...
...
@@ -1369,7 +1377,10 @@ class PolicyIterationModified(PolicyIteration):
# min(min()) is not right
self
.
V
=
1
/
(
1
-
discount
)
*
self
.
R
.
min
()
*
ones
((
self
.
S
,
1
))
def
iterate
(
self
):
# Call the iteration method
self
.
_iterate
()
def
_iterate
(
self
):
"""Run the modified policy iteration algorithm."""
if
self
.
verbose
:
...
...
@@ -1448,7 +1459,6 @@ class QLearning(MDP):
>>> random.seed(0)
>>> P, R = mdp.exampleForest()
>>> ql = mdp.QLearning(P, R, 0.96)
>>> ql.iterate()
>>> ql.Q
array([[ 68.80977389, 46.62560314],
[ 72.58265749, 43.1170545 ],
...
...
@@ -1465,7 +1475,6 @@ class QLearning(MDP):
>>> R = np.array([[5, 10], [-1, 2]])
>>> random.seed(0)
>>> ql = mdp.QLearning(P, R, 0.9)
>>> ql.iterate()
>>> ql.Q
array([[ 36.63245946, 42.24434307],
[ 35.96582807, 32.70456417]])
...
...
@@ -1511,7 +1520,10 @@ class QLearning(MDP):
self
.
Q
=
zeros
((
self
.
S
,
self
.
A
))
self
.
mean_discrepancy
=
[]
def
iterate
(
self
):
# Call the iteration method
self
.
_iterate
()
def
_iterate
(
self
):
"""Run the Q-learning algoritm."""
discrepancy
=
[]
...
...
@@ -1613,7 +1625,6 @@ class RelativeValueIteration(MDP):
>>> import mdp
>>> P, R = exampleForest()
>>> rvi = mdp.RelativeValueIteration(P, R)
>>> rvi.iterate()
>>> rvi.average_reward
2.4300000000000002
>>> rvi.policy
...
...
@@ -1625,8 +1636,7 @@ class RelativeValueIteration(MDP):
>>> import numpy as np
>>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdp.RelativeValueIteration(P, R)
>>> rvi.iterate()
>>> rvi = mdp.RelativeValueIteration(P, R)
>>> rvi.V
(10.0, 3.885235246411831)
>>> rvi.average_reward
...
...
@@ -1651,7 +1661,10 @@ class RelativeValueIteration(MDP):
self
.
average_reward
=
None
def
iterate
(
self
):
# Call the iteration method
self
.
_iterate
()
def
_iterate
(
self
):
"""Run the relative value iteration algorithm."""
done
=
False
...
...
@@ -1743,10 +1756,10 @@ class ValueIteration(MDP):
---------------
V : value function
A vector which stores the optimal value function. Prior to calling the
iterate() method it has a value of None. Shape is (S, ).
_
iterate() method it has a value of None. Shape is (S, ).
policy : epsilon-optimal policy
A vector which stores the optimal policy. Prior to calling the
iterate() method it has a value of None. Shape is (S, ).
_
iterate() method it has a value of None. Shape is (S, ).
iter : number of iterations taken to complete the computation
An integer
time : used CPU time
...
...
@@ -1754,8 +1767,6 @@ class ValueIteration(MDP):
Methods
-------
iterate()
Starts the loop for the algorithm to be completed.
setSilent()
Sets the instance to silent mode.
setVerbose()
...
...
@@ -1774,7 +1785,6 @@ class ValueIteration(MDP):
>>> vi = mdp.ValueIteration(P, R, 0.96)
>>> vi.verbose
False
>>> vi.iterate()
>>> vi.V
(5.93215488, 9.38815488, 13.38815488)
>>> vi.policy
...
...
@@ -1789,7 +1799,6 @@ class ValueIteration(MDP):
>>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdp.ValueIteration(P, R, 0.9)
>>> vi.iterate()
>>> vi.V
(40.04862539271682, 33.65371175967546)
>>> vi.policy
...
...
@@ -1807,7 +1816,6 @@ class ValueIteration(MDP):
>>> P[1] = sparse([[0, 1],[0.1, 0.9]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdp.ValueIteration(P, R, 0.9)
>>> vi.iterate()
>>> vi.V
(40.04862539271682, 33.65371175967546)
>>> vi.policy
...
...
@@ -1846,6 +1854,9 @@ class ValueIteration(MDP):
# threshold of variation for V for an epsilon-optimal policy
self
.
thresh
=
epsilon
# Call the iteration method
self
.
_iterate
()
def
_boundIter
(
self
,
epsilon
):
"""Compute a bound for the number of iterations.
...
...
@@ -1895,7 +1906,7 @@ class ValueIteration(MDP):
self
.
max_iter
=
int
(
ceil
(
max_iter
))
def
iterate
(
self
):
def
_
iterate
(
self
):
"""Run the value iteration algorithm."""
if
self
.
verbose
:
...
...
@@ -1982,8 +1993,10 @@ class ValueIterationGS(ValueIteration):
ValueIteration
.
__init__
(
self
,
transitions
,
reward
,
discount
,
epsilon
,
max_iter
,
initial_value
)
# Call the iteration method
self
.
_iterate
()
def
iterate
(
self
):
def
_
iterate
(
self
):
"""Run the value iteration Gauss-Seidel algorithm."""
done
=
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment