Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Zahra Rajabi
pymdptoolbox
Commits
c8bbc99c
Commit
c8bbc99c
authored
Aug 24, 2013
by
Steven Cordwell
Browse files
remove use of random module and replace with numpy.random
parent
eb40e9c6
Changes
3
Show whitespace changes
Inline
Side-by-side
setup.py
View file @
c8bbc99c
...
...
@@ -33,4 +33,4 @@ setup(name="pymdptoolbox",
packages
=
[
"mdptoolbox"
],
package_dir
=
{
""
:
"src"
},
requires
=
[
"math"
,
"numpy"
,
"random"
,
"scipy"
,
"time"
],)
requires
=
[
"math"
,
"numpy"
,
"scipy"
,
"time"
],)
src/mdptoolbox/example.py
View file @
c8bbc99c
...
...
@@ -5,13 +5,11 @@ Created on Sun Aug 18 14:32:25 2013
@author: steve
"""
from
random
import
random
from
numpy
import
diag
,
ones
,
where
,
zeros
from
numpy.random
import
rand
,
rand
int
from
numpy.random
import
rand
int
,
rand
om
from
scipy.sparse
import
coo_matrix
,
dok_matrix
def
exampleF
orest
(
S
=
3
,
r1
=
4
,
r2
=
2
,
p
=
0.1
,
is_sparse
=
False
):
def
f
orest
(
S
=
3
,
r1
=
4
,
r2
=
2
,
p
=
0.1
,
is_sparse
=
False
):
"""Generate a MDP example based on a simple forest management scenario.
This function is used to generate a transition probability
...
...
@@ -144,7 +142,7 @@ def exampleForest(S=3, r1=4, r2=2, p=0.1, is_sparse=False):
# we want to return the generated transition and reward matrices
return
(
P
,
R
)
def
exampleR
and
(
S
,
A
,
is_sparse
=
False
,
mask
=
None
):
def
r
and
(
S
,
A
,
is_sparse
=
False
,
mask
=
None
):
"""Generate a random Markov Decision Process.
Parameters
...
...
@@ -198,7 +196,7 @@ def exampleRand(S, A, is_sparse=False, mask=None):
RR
=
dok_matrix
((
S
,
S
))
for
s
in
xrange
(
S
):
if
mask
is
None
:
m
=
rand
(
S
)
m
=
rand
om
(
S
)
m
[
m
<=
2
/
3.0
]
=
0
m
[
m
>
2
/
3.0
]
=
1
elif
mask
.
shape
==
(
A
,
S
,
S
):
...
...
@@ -210,9 +208,9 @@ def exampleRand(S, A, is_sparse=False, mask=None):
m
[
randint
(
0
,
S
)]
=
1
n
=
1
cols
=
where
(
m
)[
0
]
# m[s, :]
vals
=
rand
(
n
)
vals
=
rand
om
(
n
)
vals
=
vals
/
vals
.
sum
()
reward
=
2
*
rand
(
n
)
-
ones
(
n
)
reward
=
2
*
rand
om
(
n
)
-
ones
(
n
)
PP
[
s
,
cols
]
=
vals
RR
[
s
,
cols
]
=
reward
# PP.tocsr() takes the same amount of time as PP.tocoo().tocsr()
...
...
@@ -229,7 +227,7 @@ def exampleRand(S, A, is_sparse=False, mask=None):
for
s
in
range
(
S
):
# create our own random mask if there is no user supplied one
if
mask
is
None
:
m
=
rand
(
S
)
m
=
rand
om
(
S
)
r
=
random
()
m
[
m
<=
r
]
=
0
m
[
m
>
r
]
=
1
...
...
@@ -241,8 +239,8 @@ def exampleRand(S, A, is_sparse=False, mask=None):
if
m
.
sum
()
==
0
:
m
[
randint
(
0
,
S
)]
=
1
n
=
1
P
[
a
][
s
]
=
m
*
rand
(
S
)
P
[
a
][
s
]
=
m
*
rand
om
(
S
)
P
[
a
][
s
]
=
P
[
a
][
s
]
/
P
[
a
][
s
].
sum
()
R
[
a
][
s
]
=
(
m
*
(
2
*
rand
(
S
)
-
ones
(
S
,
dtype
=
int
)))
R
[
a
][
s
]
=
(
m
*
(
2
*
rand
om
(
S
)
-
ones
(
S
,
dtype
=
int
)))
# we want to return the generated transition and reward matrices
return
(
P
,
R
)
src/mdptoolbox/mdp.py
View file @
c8bbc99c
...
...
@@ -95,12 +95,11 @@ http://www.inra.fr/mia/T/MDPtoolbox/.
# POSSIBILITY OF SUCH DAMAGE.
from
math
import
ceil
,
log
,
sqrt
from
random
import
random
from
time
import
time
from
numpy
import
absolute
,
array
,
empty
,
mean
,
mod
,
multiply
from
numpy
import
ndarray
,
ones
,
zeros
from
numpy.random
import
randint
from
numpy.random
import
randint
,
random
from
scipy.sparse
import
csr_matrix
as
sparse
from
utils
import
check
,
getSpan
...
...
@@ -991,10 +990,8 @@ class QLearning(MDP):
>>> # These examples are reproducible only if random seed is set to 0 in
>>> # both the random and numpy.random modules.
>>> import numpy as np
>>> import random
>>> import mdptoolbox, mdptoolbox.example
>>> np.random.seed(0)
>>> random.seed(0)
>>> P, R = mdptoolbox.example.forest()
>>> ql = mdptoolbox.mdp.QLearning(P, R, 0.96)
>>> ql.Q
...
...
@@ -1007,12 +1004,10 @@ class QLearning(MDP):
(0, 0, 0)
>>> import mdptoolbox
>>> import random
>>> import numpy as np
>>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> np.random.seed(0)
>>> random.seed(0)
>>> pim = mdptoolbox.mdp.QLearning(P, R, 0.9)
>>> ql.Q
array([[ 39.933691 , 43.17543338],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment