Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
7938265d
Commit
7938265d
authored
Jun 21, 2013
by
Steven Cordwell
Browse files
fix several docstring examples
parent
f3a07274
Changes
1
Hide whitespace changes
Inline
Side-by-side
mdp.py
View file @
7938265d
...
@@ -1046,12 +1046,13 @@ class PolicyIteration(MDP):
...
@@ -1046,12 +1046,13 @@ class PolicyIteration(MDP):
>>> import mdp
>>> import mdp
>>> P, R = mdp.exampleRand(5, 3)
>>> P, R = mdp.exampleRand(5, 3)
>>> pi = mdp.PolicyIteration(P, R, 0.9)
>>> pi = mdp.PolicyIteration(P, R, 0.9)
>>> P, R = mdp.exampleForest()
>>> P, R = mdp.exampleForest()
>>> pi = mdp.PolicyIteration(P, R, 0.9)
>>> pi = mdp.PolicyIteration(P, R, 0.9)
>>> pi.V
>>> pi.V
(26.244000000000018, 29.48400000000002, 33.484000000000016)
>>> pi.policy
>>> pi.policy
(0, 0, 0)
"""
"""
def
__init__
(
self
,
transitions
,
reward
,
discount
,
policy0
=
None
,
def
__init__
(
self
,
transitions
,
reward
,
discount
,
policy0
=
None
,
...
@@ -1444,32 +1445,37 @@ class QLearning(MDP):
...
@@ -1444,32 +1445,37 @@ class QLearning(MDP):
Examples
Examples
---------
---------
>>> import random # this example is reproducible only if random seed is set
>>> # These examples are reproducible only if random seed is set to 0 in
>>> # both the random and numpy.random modules.
>>> import numpy as np
>>> import random
>>> import mdp
>>> import mdp
>>> np.random.seed(0)
>>> random.seed(0)
>>> random.seed(0)
>>> P, R = mdp.exampleForest()
>>> P, R = mdp.exampleForest()
>>> ql = mdp.QLearning(P, R, 0.96)
>>> ql = mdp.QLearning(P, R, 0.96)
>>> ql.Q
>>> ql.Q
array([[ 68.80
977389, 46.6256031
4],
array([[ 68.
3
80
37354, 43.2488845
4],
[ 72.
58265749, 43.1170545
],
[ 72.
37777922, 42.75549145
],
[ 77.
1332834 , 65.01737419
]])
[ 77.
02892702, 64.68712932
]])
>>> ql.V
>>> ql.V
(68.80
977388561172, 72.5826574913828, 77.1332833960
0116)
(68.
3
80
37354422798, 72.37777921607258, 77.028927
01
6
16
531
)
>>> ql.policy
>>> ql.policy
(0, 0, 0)
(0, 0, 0)
>>> import random # this example is reproducible only if random seed is set
>>> import mdp
>>> import mdp
>>> import random
>>> import numpy as np
>>> import numpy as np
>>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
>>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> np.random.seed(0)
>>> random.seed(0)
>>> random.seed(0)
>>> ql = mdp.QLearning(P, R, 0.9)
>>> ql = mdp.QLearning(P, R, 0.9)
>>> ql.Q
>>> ql.Q
array([[ 3
6.63245946, 42.24434307
],
array([[ 3
9.933691 , 43.17543338
],
[ 3
5
.9
6582807, 32.70456417
]])
[ 3
6
.9
4394224, 35.42568056
]])
>>> ql.V
>>> ql.V
(4
2.24434307022128
, 3
5
.9
6582807367007
)
(4
3.17543338090149
, 3
6
.9
43942243204454
)
>>> ql.policy
>>> ql.policy
(1, 0)
(1, 0)
...
@@ -1790,7 +1796,7 @@ class ValueIteration(MDP):
...
@@ -1790,7 +1796,7 @@ class ValueIteration(MDP):
>>> R = np.array([[5, 10], [-1, 2]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdp.ValueIteration(P, R, 0.9)
>>> vi = mdp.ValueIteration(P, R, 0.9)
>>> vi.V
>>> vi.V
(40.0486253927168
2
, 33.65371175967546)
(40.0486253927168
15
, 33.65371175967546)
>>> vi.policy
>>> vi.policy
(1, 0)
(1, 0)
>>> vi.iter
>>> vi.iter
...
@@ -1807,7 +1813,7 @@ class ValueIteration(MDP):
...
@@ -1807,7 +1813,7 @@ class ValueIteration(MDP):
>>> R = np.array([[5, 10], [-1, 2]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdp.ValueIteration(P, R, 0.9)
>>> vi = mdp.ValueIteration(P, R, 0.9)
>>> vi.V
>>> vi.V
(40.0486253927168
2
, 33.65371175967546)
(40.0486253927168
15
, 33.65371175967546)
>>> vi.policy
>>> vi.policy
(1, 0)
(1, 0)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment