Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
079751c4
Commit
079751c4
authored
Jan 09, 2015
by
Steven Cordwell
Browse files
[mdp] Import the util module as _util
parent
6ad0c1a6
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/mdptoolbox/mdp.py
View file @
079751c4
...
...
@@ -28,7 +28,7 @@ ValueIterationGS
"""
# Copyright (c) 2011-201
3
Steven A. W. Cordwell
# Copyright (c) 2011-201
5
Steven A. W. Cordwell
# Copyright (c) 2009 INRA
#
# All rights reserved.
...
...
@@ -63,11 +63,7 @@ import time as _time
import
numpy
as
_np
import
scipy.sparse
as
_sp
try
:
from
.util
import
check
,
getSpan
except
ValueError
:
# importing mdp as a module rather than as part of a package
from
util
import
check
,
getSpan
import
mdptoolbox.util
as
_util
_MSG_STOP_MAX_ITER
=
"Iterating stopped due to maximum number of iterations "
\
"condition."
...
...
@@ -182,7 +178,7 @@ class MDP(object):
assert
self
.
epsilon
>
0
,
"Epsilon must be greater than 0."
# we run a check on P and R to make sure they are describing an MDP. If
# an exception isn't raised then they are assumed to be correct.
check
(
transitions
,
reward
)
_util
.
check
(
transitions
,
reward
)
# computePR will assign the variables self.S, self.A, self.P and self.R
self
.
_computePR
(
transitions
,
reward
)
# the verbosity is by default turned off
...
...
@@ -269,7 +265,7 @@ class MDP(object):
# PR(SxA) = reward matrix
#
# We assume that P and R define a MDP i,e. assumption is that
# check(P, R) has already been run and doesn't fail.
#
_util.
check(P, R) has already been run and doesn't fail.
#
# First compute store P, S, and A
self
.
_computeP
(
P
)
...
...
@@ -866,7 +862,7 @@ class PolicyIterationModified(PolicyIteration):
self
.
policy
,
Vnext
=
self
.
_bellmanOperator
()
#[Ppolicy, PRpolicy] = mdp_computePpolicyPRpolicy(P, PR, policy);
variation
=
getSpan
(
Vnext
-
self
.
V
)
variation
=
_util
.
getSpan
(
Vnext
-
self
.
V
)
if
self
.
verbose
:
print
((
" %s
\t\t
%s"
%
(
self
.
iter
,
variation
)))
...
...
@@ -969,7 +965,7 @@ class QLearning(MDP):
# We don't want to send this to MDP because _computePR should not be
# run on it, so check that it defines an MDP
check
(
transitions
,
reward
)
_util
.
check
(
transitions
,
reward
)
# Store P, S, and A
self
.
_computeP
(
transitions
)
...
...
@@ -1143,7 +1139,7 @@ class RelativeValueIteration(MDP):
self
.
policy
,
Vnext
=
self
.
_bellmanOperator
()
Vnext
=
Vnext
-
self
.
gain
variation
=
getSpan
(
Vnext
-
self
.
V
)
variation
=
_util
.
getSpan
(
Vnext
-
self
.
V
)
if
self
.
verbose
:
print
((
" %s
\t\t
%s"
%
(
self
.
iter
,
variation
)))
...
...
@@ -1344,8 +1340,9 @@ class ValueIteration(MDP):
Vprev
=
self
.
V
null
,
value
=
self
.
_bellmanOperator
()
# p 201, Proposition 6.6.5
span
=
_util
.
getSpan
(
value
-
Vprev
)
max_iter
=
(
_math
.
log
((
epsilon
*
(
1
-
self
.
discount
)
/
self
.
discount
)
/
getSpan
(
value
-
Vprev
)
)
/
_math
.
log
(
self
.
discount
*
k
))
span
)
/
_math
.
log
(
self
.
discount
*
k
))
#self.V = Vprev
self
.
max_iter
=
int
(
_math
.
ceil
(
max_iter
))
...
...
@@ -1368,7 +1365,7 @@ class ValueIteration(MDP):
# The values, based on Q. For the function "max()": the option
# "axis" means the axis along which to operate. In this case it
# finds the maximum of the the rows. (Operates along the columns?)
variation
=
getSpan
(
self
.
V
-
Vprev
)
variation
=
_util
.
getSpan
(
self
.
V
-
Vprev
)
if
self
.
verbose
:
print
((
" %s
\t\t
%s"
%
(
self
.
iter
,
variation
)))
...
...
@@ -1495,7 +1492,7 @@ class ValueIterationGS(ValueIteration):
self
.
V
[
s
]
=
max
(
Q
)
variation
=
getSpan
(
self
.
V
-
Vprev
)
variation
=
_util
.
getSpan
(
self
.
V
-
Vprev
)
if
self
.
verbose
:
print
((
" %s
\t\t
%s"
%
(
self
.
iter
,
variation
)))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment