Commit 938d603f authored by Steven Cordwell's avatar Steven Cordwell

fixed ValueIteration._boundIter() so that it accepts dense matrices. also some unit tests

parent 88c696ae
Copyright (c) 2011, 2012, 2013 Steven Cordwell
Copyright (c) 2009, Iadine Chadès
Copyright (c) 2009, Marie-Josée Cros
Copyright (c) 2009, Frédérick Garcia
Copyright (c) 2009, Régis Sabbadin
Copyright (c) 2011-2013 Steven A. W. Cordwell
Copyright (c) 2009 INRA
All rights reserved.
......
......@@ -13,18 +13,19 @@ doc folder or from `the MDPtoolbox homepage <http://www.>`_.
Installation
------------
1. Download the stable release from http:// or get a local copy of the
source with Git
1. Download the latest stable release from
`http://code.google.com/p/pymdptoolbox/downloads/list`_ or clone the
Git repository
``git clone https://code.google.com/p/pymdptoolbox/``
2. If you downloaded the `*.zip` or `*.tar.gz` archive, then extract it
``tar -xzvf pymdptoolbox.tar.gz``
``unzip pymdptoolbox``
``tar -xzvf pymdptoolbox-<VERSION>.tar.gz``
``unzip pymdptoolbox-<VERSION>``
3. Change to the MDP toolbox directory
``cd pymdptoolbox``
4. Install via Docutils either to the filesystem or to a home directory
4. Install via Distutils, either to the filesystem or to a home directory
``python setup.py install``
``python setup.py install --home=<dir>``
......
......@@ -56,13 +56,15 @@ The documentation can be displayed with
the ValueIteration class use ``mdp.ValueIteration?<ENTER>``, and to view its
source code use ``mdp.ValueIteration??<ENTER>``.
Acknowledgments
---------------
This module is modified from the MDPtoolbox (c) 2009 INRA available at
`http://www.inra.fr/mia/T/MDPtoolbox/`_.
"""
# Copyright (c) 2011, 2012, 2013 Steven Cordwell
# Copyright (c) 2009, Iadine Chadès
# Copyright (c) 2009, Marie-Josée Cros
# Copyright (c) 2009, Frédérick Garcia
# Copyright (c) 2009, Régis Sabbadin
# Copyright (c) 2011-2013 Steven A. W. Cordwell
# Copyright (c) 2009 INRA
#
# All rights reserved.
#
......@@ -1548,20 +1550,29 @@ class RelativeValueIteration(MDP):
--------
>>> import mdp
>>> P, R = exampleForest()
>>> rvi = mdp.RelativeValueIteration(P, R, 0.96)
>>> rvi = mdp.RelativeValueIteration(P, R)
>>> rvi.iterate()
>>> rvi.average_reward
2.4300000000000002
>>> rvi.policy
(0, 0, 0)
>>> rvi.iter
4
>>> import mdp
>>> import numpy as np
>>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
>>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdp.RelativeValueIteration(P, R, 0.9)
>>> vi = mdp.RelativeValueIteration(P, R)
>>> rvi.iterate()
>>> rvi.V
(10.0, 3.885235246411831)
>>> rvi.average_reward
3.8852352464118312
>>> rvi.policy
(1, 0)
>>> rvi.iter
29
"""
......@@ -1729,7 +1740,7 @@ class ValueIteration(MDP):
>>> import mdp
>>> import numpy as np
>>> from scipy.sparse import csr_matrix as sparse
>>> P = np.zeros((2, ), dtype=object)
>>> P = np.empty(2, dtype=object)
>>> P[0] = sparse([[0.5, 0.5],[0.8, 0.2]])
>>> P[1] = sparse([[0, 1],[0.1, 0.9]])
>>> R = np.array([[5, 10], [-1, 2]])
......@@ -1799,7 +1810,13 @@ class ValueIteration(MDP):
for ss in range(self.S):
PP = matrix(zeros((self.S, self.A)))
for aa in range(self.A):
try:
PP[:, aa] = self.P[aa][:, ss]
except ValueError:
try:
PP[:, aa] = self.P[aa][:, ss].todense()
except:
raise
# the function "min()" without any arguments finds the
# minimum of the entire array.
h[ss] = PP.min()
......
# -*- coding: utf-8 -*-
from distutils.core import setup
setup(name="PyMDPtoolbox",
version="4.0alpha1",
description="Markov Decision Process (MDP) Toolbox 4.0",
setup(name="pymdptoolbox",
version="4.0a1",
author="Steven A. W. Cordwell",
author_email="steven.cordwell@uqconnect.edu.au",
url="http://code.google.com/p/pymdptoolbox/",
license="New BSD License",
description="Markov Decision Process (MDP) Toolbox",
long_description="The MDP toolbox provides classes and functions for "
"the resolution of descrete-time Markov Decision Processes. The list of "
"algorithms that have been implemented includes backwards induction, "
"linear programming, policy iteration, q-learning and value iteration "
"along with several variations.",
download_url="http://code.google.com/p/pymdptoolbox/downloads/list",
classifiers=[
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Software Development :: Libraries :: Python Modules"],
platforms=["Any"],
license="New BSD",
py_modules=["mdp"],
requires=["math", "numpy", "random", "scipy", "time"],)
# -*- coding: utf-8 -*-
"""
Created on Sun May 27 23:16:57 2012
"""The Python Markov Decision Process (MDP) Toolbox Test Suite
===========================================================
These unit tests are written for the nosetests framwork. You will need to have
nosetests installed, and then run from the command line.
$ cd /path/to/pymdptoolbox
$ nostests
@author: -
"""
from random import seed as randseed
from numpy import absolute, array, eye, matrix, zeros
from numpy import absolute, array, empty, eye, matrix, zeros
from numpy.random import rand
from scipy.sparse import eye as speye
from scipy.sparse import csr_matrix as sparse
#from scipy.stats.distributions import poisson
from mdp import check, checkSquareStochastic, exampleForest, exampleRand, LP
from mdp import check, checkSquareStochastic, exampleForest, exampleRand
from mdp import MDP, PolicyIteration, QLearning, RelativeValueIteration
from mdp import ValueIteration, ValueIterationGS
......@@ -24,6 +29,9 @@ SMALLNUM = 10e-12
# Arrays
P = array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
R = array([[5, 10], [-1, 2]])
Ps = empty(2, dtype=object)
Ps[0] = sparse([[0.5, 0.5],[0.8, 0.2]])
Ps[1] = sparse([[0, 1],[0.1, 0.9]])
Pf, Rf = exampleForest()
Pr, Rr = exampleRand(STATES, ACTIONS)
Prs, Rrs = exampleRand(STATES, ACTIONS, is_sparse=True)
......@@ -48,21 +56,21 @@ def test_check_square_stochastic_nonnegative_array_2():
# check: P - square, stochastic and non-negative object arrays
def test_check_P_square_stochastic_nonnegative_object_array():
P = zeros((ACTIONS, ), dtype=object)
P = empty(ACTIONS, dtype=object)
R = rand(STATES, ACTIONS)
for a in range(ACTIONS):
P[a] = eye(STATES)
assert (check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_object_matrix():
P = zeros((ACTIONS, ), dtype=object)
P = empty(ACTIONS, dtype=object)
R = rand(STATES, ACTIONS)
for a in range(ACTIONS):
P[a] = matrix(eye(STATES))
assert (check(P, R) == None)
def test_check_P_square_stochastic_nonnegative_object_sparse():
P = zeros((ACTIONS, ), dtype=object)
P = empty(ACTIONS, dtype=object)
R = rand(STATES, ACTIONS)
for a in range(ACTIONS):
P[a] = speye(STATES, STATES).tocsr()
......@@ -81,7 +89,7 @@ def test_check_R_square_stochastic_nonnegative_sparse():
def test_check_R_square_stochastic_nonnegative_object_array():
P = zeros((ACTIONS, STATES, STATES))
R = zeros((ACTIONS, ), dtype=object)
R = empty(ACTIONS, dtype=object)
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
R[a] = rand(STATES, STATES)
......@@ -89,7 +97,7 @@ def test_check_R_square_stochastic_nonnegative_object_array():
def test_check_R_square_stochastic_nonnegative_object_matrix():
P = zeros((ACTIONS, STATES, STATES))
R = zeros((ACTIONS, ), dtype=object)
R = empty(ACTIONS, dtype=object)
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
R[a] = matrix(rand(STATES, STATES))
......@@ -97,7 +105,7 @@ def test_check_R_square_stochastic_nonnegative_object_matrix():
def test_check_R_square_stochastic_nonnegative_object_sparse():
P = zeros((ACTIONS, STATES, STATES))
R = zeros((ACTIONS, ), dtype=object)
R = empty(ACTIONS, dtype=object)
for a in range(ACTIONS):
P[a, :, :] = eye(STATES)
R[a] = sparse(rand(STATES, STATES))
......@@ -194,7 +202,7 @@ def test_MDP_P_R_1():
def test_MDP_P_R_2():
R = array([[[5, 10], [-1, 2]], [[1, 2], [3, 4]]])
P1 = zeros((2, ), dtype=object)
P1 = empty(2, dtype=object)
P1[0] = matrix('0.5 0.5; 0.8 0.2')
P1[1] = matrix('0 1; 0.1 0.9')
R1 = matrix('7.5 2; -0.4 3.9')
......@@ -317,15 +325,37 @@ def test_QLearning_exampleForest():
# RelativeValueIteration
def test_RelativeValueIteration_dense():
a = RelativeValueIteration(P, R)
p= matrix('1 0')
ar = 3.88523524641183
itr = 29
a.iterate()
assert (array(a.policy) == p).all()
assert a.iter == itr
assert absolute(a.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_sparse():
a = RelativeValueIteration(Ps, R)
p= matrix('1 0')
ar = 3.88523524641183
itr = 29
a.iterate()
assert (array(a.policy) == p).all()
assert a.iter == itr
assert absolute(a.average_reward - ar) < SMALLNUM
def test_RelativeValueIteration_exampleForest():
a = RelativeValueIteration(Pf, Rf)
itr = 4
p = matrix('0 0 0')
v = matrix('-4.360000000000000 -0.760000000000000 3.240000000000000')
#v = matrix('-4.360000000000000 -0.760000000000000 3.240000000000000')
ar = 2.43000000000000
a.iterate()
assert (array(a.policy) == p).all()
assert a.iter == itr
assert (absolute(array(a.V) - v) < SMALLNUM).all()
#assert (absolute(array(a.V) - v) < SMALLNUM).all()
assert absolute(a.average_reward - ar) < SMALLNUM
# ValueIteration
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment