Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
6909d0bf
Commit
6909d0bf
authored
Sep 11, 2013
by
Steven Cordwell
Browse files
result of 2to3 conversion
parent
64fd9d8b
Changes
12
Hide whitespace changes
Inline
Side-by-side
src/mdptoolbox/__init__.py
View file @
6909d0bf
...
...
@@ -78,7 +78,7 @@ http://www.inra.fr/mia/T/MDPtoolbox/.
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import
mdp
from
.
import
mdp
if
__name__
==
"__main__"
:
import
doctest
...
...
src/mdptoolbox/example.py
View file @
6909d0bf
...
...
@@ -146,11 +146,11 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False):
# | p 0 0....0 1-p | | 1 0..........0 |
if
is_sparse
:
P
=
[]
rows
=
range
(
S
)
*
2
cols
=
[
0
]
*
S
+
range
(
1
,
S
)
+
[
S
-
1
]
rows
=
list
(
range
(
S
)
)
*
2
cols
=
[
0
]
*
S
+
list
(
range
(
1
,
S
)
)
+
[
S
-
1
]
vals
=
[
p
]
*
S
+
[
1
-
p
]
*
S
P
.
append
(
coo_matrix
((
vals
,
(
rows
,
cols
)),
shape
=
(
S
,
S
)).
tocsr
())
rows
=
range
(
S
)
rows
=
list
(
range
(
S
)
)
cols
=
[
0
]
*
S
vals
=
[
1
]
*
S
P
.
append
(
coo_matrix
((
vals
,
(
rows
,
cols
)),
shape
=
(
S
,
S
)).
tocsr
())
...
...
@@ -223,13 +223,13 @@ def rand(S, A, is_sparse=False, mask=None):
P
=
[
None
]
*
A
# definition of reward matrix (values between -1 and +1)
R
=
[
None
]
*
A
for
a
in
x
range
(
A
):
for
a
in
range
(
A
):
# it may be more efficient to implement this by constructing lists
# of rows, columns and values then creating a coo_matrix, but this
# works for now
PP
=
dok_matrix
((
S
,
S
))
RR
=
dok_matrix
((
S
,
S
))
for
s
in
x
range
(
S
):
for
s
in
range
(
S
):
if
mask
is
None
:
m
=
random
(
S
)
m
[
m
<=
2
/
3.0
]
=
0
...
...
src/mdptoolbox/mdp.py
View file @
6909d0bf
...
...
@@ -65,7 +65,7 @@ from numpy import ndarray, ones, zeros
from
numpy.random
import
randint
,
random
from
scipy.sparse
import
csr_matrix
as
sparse
from
utils
import
check
,
getSpan
from
.
utils
import
check
,
getSpan
class
MDP
(
object
):
...
...
@@ -236,10 +236,10 @@ class MDP(object):
for
aa
in
range
(
self
.
A
)])
else
:
self
.
R
=
tuple
([
multiply
(
P
[
aa
],
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
)
for
aa
in
x
range
(
self
.
A
)])
for
aa
in
range
(
self
.
A
)])
except
AttributeError
:
self
.
R
=
tuple
([
multiply
(
P
[
aa
],
R
[
aa
]).
sum
(
1
).
reshape
(
self
.
S
)
for
aa
in
x
range
(
self
.
A
)])
for
aa
in
range
(
self
.
A
)])
def
run
(
self
):
# Raise error because child classes should implement this function.
...
...
@@ -337,8 +337,8 @@ class FiniteHorizon(MDP):
self
.
V
[:,
self
.
N
-
n
-
1
]
=
X
self
.
policy
[:,
self
.
N
-
n
-
1
]
=
W
if
self
.
verbose
:
print
(
"stage: %s ... policy transpose : %s"
)
%
(
self
.
N
-
n
,
self
.
policy
[:,
self
.
N
-
n
-
1
].
tolist
())
print
(
(
"stage: %s ... policy transpose : %s"
)
%
(
self
.
N
-
n
,
self
.
policy
[:,
self
.
N
-
n
-
1
].
tolist
())
)
# update time spent running
self
.
time
=
time
()
-
self
.
time
# After this we could create a tuple of tuples for the values and
...
...
@@ -643,7 +643,7 @@ class PolicyIteration(MDP):
variation
=
absolute
(
policy_V
-
Vprev
).
max
()
if
self
.
verbose
:
print
(
' %s %s'
)
%
(
itr
,
variation
)
print
(
(
' %s %s'
)
%
(
itr
,
variation
)
)
# ensure |Vn - Vpolicy| < epsilon
if
variation
<
((
1
-
self
.
discount
)
/
self
.
discount
)
*
epsilon
:
...
...
@@ -710,8 +710,8 @@ class PolicyIteration(MDP):
n_different
=
(
policy_next
!=
self
.
policy
).
sum
()
# if verbose then continue printing a table
if
self
.
verbose
:
print
(
' %s %s'
)
%
(
self
.
iter
,
n_different
)
print
(
(
' %s %s'
)
%
(
self
.
iter
,
n_different
)
)
# Once the policy is unchanging of the maximum number of
# of iterations has been reached then stop
if
n_different
==
0
:
...
...
@@ -836,7 +836,7 @@ class PolicyIterationModified(PolicyIteration):
variation
=
getSpan
(
Vnext
-
self
.
V
)
if
self
.
verbose
:
print
(
"
\t
%s
\t
%s"
%
(
self
.
iter
,
variation
))
print
(
(
"
\t
%s
\t
%s"
%
(
self
.
iter
,
variation
))
)
self
.
V
=
Vnext
if
variation
<
self
.
thresh
:
...
...
@@ -1114,7 +1114,7 @@ class RelativeValueIteration(MDP):
variation
=
getSpan
(
Vnext
-
self
.
V
)
if
self
.
verbose
:
print
(
" %s %s"
%
(
self
.
iter
,
variation
))
print
(
(
" %s %s"
%
(
self
.
iter
,
variation
))
)
if
variation
<
self
.
epsilon
:
done
=
True
...
...
@@ -1383,7 +1383,7 @@ class ValueIteration(MDP):
variation
=
getSpan
(
self
.
V
-
Vprev
)
if
self
.
verbose
:
print
(
"
\t
%s
\t
%s"
%
(
self
.
iter
,
variation
))
print
(
(
"
\t
%s
\t
%s"
%
(
self
.
iter
,
variation
))
)
if
variation
<
self
.
thresh
:
if
self
.
verbose
:
...
...
@@ -1509,7 +1509,7 @@ class ValueIterationGS(ValueIteration):
variation
=
getSpan
(
self
.
V
-
Vprev
)
if
self
.
verbose
:
print
(
" %s %s"
%
(
self
.
iter
,
variation
))
print
(
(
" %s %s"
%
(
self
.
iter
,
variation
))
)
if
variation
<
self
.
thresh
:
done
=
True
...
...
src/mdptoolbox/utils.py
View file @
6909d0bf
...
...
@@ -132,7 +132,7 @@ def check(P, R):
try
:
aP
=
len
(
P
)
sP0
,
sP1
=
P
[
0
].
shape
for
aa
in
x
range
(
1
,
aP
):
for
aa
in
range
(
1
,
aP
):
sP0aa
,
sP1aa
=
P
[
aa
].
shape
if
(
sP0aa
!=
sP0
)
or
(
sP1aa
!=
sP1
):
raise
InvalidMDPError
(
mdperr
[
"obj_square"
])
...
...
@@ -172,7 +172,7 @@ def check(P, R):
if
(
sP0
!=
sR0
)
or
(
aP
!=
aR
):
raise
InvalidMDPError
(
mdperr
[
"PR_incompat"
])
# Check that the P's are square and stochastic
for
aa
in
x
range
(
aP
):
for
aa
in
range
(
aP
):
checkSquareStochastic
(
P
[
aa
])
#checkSquareStochastic(P[aa, :, :])
# We are at the end of the checks, so if no exceptions have been raised
...
...
src/tests/test_MDP.py
View file @
6909d0bf
...
...
@@ -9,7 +9,7 @@ import numpy as np
import
mdptoolbox
,
mdptoolbox
.
example
from
utils
import
SMALLNUM
,
P_small
,
R_small
from
.
utils
import
SMALLNUM
,
P_small
,
R_small
def
test_MDP_P_R_1
():
P1
=
[]
...
...
src/tests/test_PolicyIteration.py
View file @
6909d0bf
...
...
@@ -9,8 +9,8 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
,
P_sparse
from
utils
import
P_forest_sparse
,
R_forest_sparse
from
.
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
,
P_sparse
from
.
utils
import
P_forest_sparse
,
R_forest_sparse
def
test_PolicyIteration_init_policy0
():
sdp
=
mdptoolbox
.
mdp
.
PolicyIteration
(
P_small
,
R_small
,
0.9
)
...
...
src/tests/test_QLearning.py
View file @
6909d0bf
...
...
@@ -9,8 +9,8 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
utils
import
R_forest_sparse
,
P_small
,
R_small
,
P_sparse
from
.
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
.
utils
import
R_forest_sparse
,
P_small
,
R_small
,
P_sparse
def
test_QLearning_small
():
np
.
random
.
seed
(
0
)
...
...
src/tests/test_RelativeValueIteration.py
View file @
6909d0bf
...
...
@@ -9,8 +9,8 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
utils
import
R_forest_sparse
,
P_small
,
R_small
,
P_sparse
from
.
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
.
utils
import
R_forest_sparse
,
P_small
,
R_small
,
P_sparse
def
test_RelativeValueIteration_small
():
sdp
=
mdptoolbox
.
mdp
.
RelativeValueIteration
(
P_small
,
R_small
)
...
...
src/tests/test_ValueIteration.py
View file @
6909d0bf
...
...
@@ -9,9 +9,9 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
utils
import
R_forest_sparse
,
P_rand
,
R_rand
,
P_rand_sparse
,
R_rand_sparse
from
utils
import
P_small
,
R_small
,
P_sparse
from
.
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_forest_sparse
from
.
utils
import
R_forest_sparse
,
P_rand
,
R_rand
,
P_rand_sparse
,
R_rand_sparse
from
.
utils
import
P_small
,
R_small
,
P_sparse
def
test_ValueIteration_small
():
sdp
=
mdptoolbox
.
mdp
.
ValueIteration
(
P_small
,
R_small
,
0.9
)
...
...
src/tests/test_ValueIterationGS.py
View file @
6909d0bf
...
...
@@ -9,8 +9,8 @@ import numpy as np
import
mdptoolbox
from
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
,
P_sparse
from
utils
import
P_forest_sparse
,
R_forest_sparse
from
.
utils
import
SMALLNUM
,
P_forest
,
R_forest
,
P_small
,
R_small
,
P_sparse
from
.
utils
import
P_forest_sparse
,
R_forest_sparse
def
test_ValueIterationGS_small
():
sdp
=
mdptoolbox
.
mdp
.
ValueIterationGS
(
P_small
,
R_small
,
0.9
)
...
...
src/tests/test_examples.py
View file @
6909d0bf
...
...
@@ -9,8 +9,8 @@ import numpy as np
import
mdptoolbox.example
from
utils
import
ACTIONS
,
STATES
,
P_forest
,
R_forest
,
P_rand
,
R_rand
from
utils
import
P_rand_sparse
,
R_rand_sparse
from
.
utils
import
ACTIONS
,
STATES
,
P_forest
,
R_forest
,
P_rand
,
R_rand
from
.
utils
import
P_rand_sparse
,
R_rand_sparse
def
test_exampleForest_P_shape
():
assert
(
P_forest
==
np
.
array
([[[
0.1
,
0.9
,
0.0
],
...
...
src/tests/test_utils.py
View file @
6909d0bf
...
...
@@ -10,7 +10,7 @@ import scipy as sp
import
mdptoolbox
from
utils
import
ACTIONS
,
STATES
from
.
utils
import
ACTIONS
,
STATES
def
test_check_square_stochastic_nonnegative_array_1
():
P
=
np
.
zeros
((
ACTIONS
,
STATES
,
STATES
))
...
...
@@ -55,21 +55,21 @@ def test_check_P_square_stochastic_nonnegative_object_sparse():
def
test_check_P_square_stochastic_nonnegative_list_array
():
P
=
[]
R
=
np
.
random
.
rand
(
STATES
,
ACTIONS
)
for
a
in
x
range
(
ACTIONS
):
for
a
in
range
(
ACTIONS
):
P
.
append
(
np
.
eye
(
STATES
))
assert
(
mdptoolbox
.
utils
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_list_matrix
():
P
=
[]
R
=
np
.
random
.
rand
(
STATES
,
ACTIONS
)
for
a
in
x
range
(
ACTIONS
):
for
a
in
range
(
ACTIONS
):
P
.
append
(
np
.
matrix
(
np
.
eye
(
STATES
)))
assert
(
mdptoolbox
.
utils
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_list_sparse
():
P
=
[]
R
=
np
.
random
.
rand
(
STATES
,
ACTIONS
)
for
a
in
x
range
(
ACTIONS
):
for
a
in
range
(
ACTIONS
):
P
.
append
(
sp
.
sparse
.
eye
(
STATES
,
STATES
).
tocsr
())
assert
(
mdptoolbox
.
utils
.
check
(
P
,
R
)
==
None
)
...
...
@@ -78,21 +78,21 @@ def test_check_P_square_stochastic_nonnegative_list_sparse():
def
test_check_P_square_stochastic_nonnegative_dict_array
():
P
=
{}
R
=
np
.
random
.
rand
(
STATES
,
ACTIONS
)
for
a
in
x
range
(
ACTIONS
):
for
a
in
range
(
ACTIONS
):
P
[
a
]
=
np
.
eye
(
STATES
)
assert
(
mdptoolbox
.
utils
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_dict_matrix
():
P
=
{}
R
=
np
.
random
.
rand
(
STATES
,
ACTIONS
)
for
a
in
x
range
(
ACTIONS
):
for
a
in
range
(
ACTIONS
):
P
[
a
]
=
np
.
matrix
(
np
.
eye
(
STATES
))
assert
(
mdptoolbox
.
utils
.
check
(
P
,
R
)
==
None
)
def
test_check_P_square_stochastic_nonnegative_dict_sparse
():
P
=
{}
R
=
np
.
random
.
rand
(
STATES
,
ACTIONS
)
for
a
in
x
range
(
ACTIONS
):
for
a
in
range
(
ACTIONS
):
P
[
a
]
=
sp
.
sparse
.
eye
(
STATES
,
STATES
).
tocsr
()
assert
(
mdptoolbox
.
utils
.
check
(
P
,
R
)
==
None
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment