Commit 3d20ccb4 authored by Steven Cordwell's avatar Steven Cordwell

[doctests] Make doctests more robust

Use ellipsis so that Python 2 and Python 3 outputs will pass. Check that
floating point numbers are within 1e-12 of the expected answer rather
than exact matches.
parent 0970cc4a
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
# Is there any difference to using ``python setup.py nosetests``? # Is there any difference to using ``python setup.py nosetests``?
nosetests --with-coverage --cover-package=mdptoolbox --with-doctest \ nosetests --with-coverage --cover-package=mdptoolbox --with-doctest \
--doctest-options='+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL' --doctest-options='+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL'
...@@ -140,10 +140,10 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False): ...@@ -140,10 +140,10 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False):
>>> len(Psp) >>> len(Psp)
2 2
>>> Psp[0] >>> Psp[0]
<3x3 sparse matrix of type '<type 'numpy.float64'>' <3x3 sparse matrix of type '<... 'numpy.float64'>'
with 6 stored elements in Compressed Sparse Row format> with 6 stored elements in Compressed Sparse Row format>
>>> Psp[1] >>> Psp[1]
<3x3 sparse matrix of type '<type 'numpy.int64'>' <3x3 sparse matrix of type '<... 'numpy.int64'>'
with 3 stored elements in Compressed Sparse Row format> with 3 stored elements in Compressed Sparse Row format>
>>> Rsp >>> Rsp
array([[ 0., 0.], array([[ 0., 0.],
...@@ -252,10 +252,10 @@ def rand(S, A, is_sparse=False, mask=None): ...@@ -252,10 +252,10 @@ def rand(S, A, is_sparse=False, mask=None):
>>> len(Psp), len(Rsp) >>> len(Psp), len(Rsp)
(5, 5) (5, 5)
>>> Psp[0] >>> Psp[0]
<100x100 sparse matrix of type '<type 'numpy.float64'>' <100x100 sparse matrix of type '<... 'numpy.float64'>'
with 3296 stored elements in Compressed Sparse Row format> with 3296 stored elements in Compressed Sparse Row format>
>>> Rsp[0] >>> Rsp[0]
<100x100 sparse matrix of type '<type 'numpy.float64'>' <100x100 sparse matrix of type '<... 'numpy.float64'>'
with 3296 stored elements in Compressed Sparse Row format> with 3296 stored elements in Compressed Sparse Row format>
>>> # The number of non-zero elements (nnz) in P and R are equal >>> # The number of non-zero elements (nnz) in P and R are equal
>>> Psp[1].nnz == Rsp[1].nnz >>> Psp[1].nnz == Rsp[1].nnz
......
...@@ -546,8 +546,9 @@ class PolicyIteration(MDP): ...@@ -546,8 +546,9 @@ class PolicyIteration(MDP):
>>> P, R = mdptoolbox.example.forest() >>> P, R = mdptoolbox.example.forest()
>>> pi = mdptoolbox.mdp.PolicyIteration(P, R, 0.9) >>> pi = mdptoolbox.mdp.PolicyIteration(P, R, 0.9)
>>> pi.run() >>> pi.run()
>>> pi.V >>> expected = (26.244000000000014, 29.484000000000016, 33.484000000000016)
(26.244000000000014, 29.484000000000016, 33.484000000000016) >>> all(expected[k] - pi.V[k] < 1e-12 for k in range(len(expected)))
True
>>> pi.policy >>> pi.policy
(0, 0, 0) (0, 0, 0)
""" """
...@@ -820,8 +821,9 @@ class PolicyIterationModified(PolicyIteration): ...@@ -820,8 +821,9 @@ class PolicyIterationModified(PolicyIteration):
>>> pim.run() >>> pim.run()
>>> pim.policy >>> pim.policy
(0, 0, 0) (0, 0, 0)
>>> pim.V >>> expected = (21.81408652334702, 25.054086523347017, 29.054086523347017)
(21.81408652334702, 25.054086523347017, 29.054086523347017) >>> all(expected[k] - pim.V[k] < 1e-12 for k in range(len(expected)))
True
""" """
...@@ -942,8 +944,9 @@ class QLearning(MDP): ...@@ -942,8 +944,9 @@ class QLearning(MDP):
array([[ 11.198909 , 10.34652034], array([[ 11.198909 , 10.34652034],
[ 10.74229967, 11.74105792], [ 10.74229967, 11.74105792],
[ 2.86980001, 12.25973286]]) [ 2.86980001, 12.25973286]])
>>> ql.V >>> expected = (11.198908998901134, 11.741057920409865, 12.259732864170232)
(11.198908998901134, 11.741057920409865, 12.259732864170232) >>> all(expected[k] - ql.V[k] < 1e-12 for k in range(len(expected)))
True
>>> ql.policy >>> ql.policy
(0, 1, 1) (0, 1, 1)
...@@ -957,8 +960,9 @@ class QLearning(MDP): ...@@ -957,8 +960,9 @@ class QLearning(MDP):
>>> ql.Q >>> ql.Q
array([[ 33.33010866, 40.82109565], array([[ 33.33010866, 40.82109565],
[ 34.37431041, 29.67236845]]) [ 34.37431041, 29.67236845]])
>>> ql.V >>> expected = (40.82109564847122, 34.37431040682546)
(40.82109564847122, 34.37431040682546) >>> all(expected[k] - ql.V[k] < 1e-12 for k in range(len(expected)))
True
>>> ql.policy >>> ql.policy
(1, 0) (1, 0)
...@@ -1110,8 +1114,9 @@ class RelativeValueIteration(MDP): ...@@ -1110,8 +1114,9 @@ class RelativeValueIteration(MDP):
>>> R = np.array([[5, 10], [-1, 2]]) >>> R = np.array([[5, 10], [-1, 2]])
>>> rvi = mdptoolbox.mdp.RelativeValueIteration(P, R) >>> rvi = mdptoolbox.mdp.RelativeValueIteration(P, R)
>>> rvi.run() >>> rvi.run()
>>> rvi.V >>> expected = (10.0, 3.885235246411831)
(10.0, 3.885235246411831) >>> all(expected[k] - rvi.V[k] < 1e-12 for k in range(len(expected)))
True
>>> rvi.average_reward >>> rvi.average_reward
3.8852352464118312 3.8852352464118312
>>> rvi.policy >>> rvi.policy
...@@ -1252,8 +1257,9 @@ class ValueIteration(MDP): ...@@ -1252,8 +1257,9 @@ class ValueIteration(MDP):
>>> vi.verbose >>> vi.verbose
False False
>>> vi.run() >>> vi.run()
>>> vi.V >>> expected = (5.93215488, 9.38815488, 13.38815488)
(5.93215488, 9.38815488, 13.38815488) >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected)))
True
>>> vi.policy >>> vi.policy
(0, 0, 0) (0, 0, 0)
>>> vi.iter >>> vi.iter
...@@ -1264,38 +1270,10 @@ class ValueIteration(MDP): ...@@ -1264,38 +1270,10 @@ class ValueIteration(MDP):
>>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]]) >>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
>>> R = np.array([[5, 10], [-1, 2]]) >>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9) >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9)
>>> vi.setVerbose()
>>> vi.run() >>> vi.run()
Iteration V-variation >>> expected = (40.048625392716815, 33.65371175967546)
1 8.0 >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected)))
2 2.76 True
3 1.9872
4 1.430784
5 1.03016448
6 0.7417184256
7 0.534037266432
8 0.384506831831
9 0.276844918918
10 0.199328341621
11 0.143516405967
12 0.103331812296
13 0.0743989048534
14 0.0535672114945
15 0.038568392276
16 0.0277692424387
17 0.0199938545559
18 0.0143955752802
19 0.0103648142018
20 0.00746266622526
21 0.00537311968218
22 0.00386864617116
23 0.00278542524322
24 0.00200550617512
25 0.00144396444609
26 0.0010396544012
Iterating stopped, epsilon-optimal policy found.
>>> vi.V
(40.048625392716815, 33.65371175967546)
>>> vi.policy >>> vi.policy
(1, 0) (1, 0)
>>> vi.iter >>> vi.iter
...@@ -1310,8 +1288,9 @@ class ValueIteration(MDP): ...@@ -1310,8 +1288,9 @@ class ValueIteration(MDP):
>>> R = np.array([[5, 10], [-1, 2]]) >>> R = np.array([[5, 10], [-1, 2]])
>>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9) >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9)
>>> vi.run() >>> vi.run()
>>> vi.V >>> expected = (40.048625392716815, 33.65371175967546)
(40.048625392716815, 33.65371175967546) >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected)))
True
>>> vi.policy >>> vi.policy
(1, 0) (1, 0)
...@@ -1469,12 +1448,13 @@ class ValueIterationGS(ValueIteration): ...@@ -1469,12 +1448,13 @@ class ValueIterationGS(ValueIteration):
Examples Examples
-------- --------
>>> import mdptoolbox, mdptoolbox.example >>> import mdptoolbox.example, numpy as np
>>> P, R = mdptoolbox.example.forest() >>> P, R = mdptoolbox.example.forest()
>>> vigs = mdptoolbox.mdp.ValueIterationGS(P, R, 0.9) >>> vigs = mdptoolbox.mdp.ValueIterationGS(P, R, 0.9)
>>> vigs.run() >>> vigs.run()
>>> vigs.V >>> expected = (25.5833879767579, 28.830654635546928, 32.83065463554693)
(25.5833879767579, 28.830654635546928, 32.83065463554693) >>> all(expected[k] - vigs.V[k] < 1e-12 for k in range(len(expected)))
True
>>> vigs.policy >>> vigs.policy
(0, 0, 0) (0, 0, 0)
......
...@@ -273,6 +273,10 @@ def check(P, R): ...@@ -273,6 +273,10 @@ def check(P, R):
# what the reward arrar is reporting agree as to the number of actions # what the reward arrar is reporting agree as to the number of actions
# and states. If not then fail explaining the situation # and states. If not then fail explaining the situation
def rowsSumToOne(Z, n):
return((_np.abs(Z.sum(axis=1) - _np.ones(n))).max() <=
10 * _np.spacing(_np.float64(1)))
def checkSquareStochastic(Z): def checkSquareStochastic(Z):
"""Check if Z is a square stochastic matrix. """Check if Z is a square stochastic matrix.
...@@ -298,8 +302,7 @@ def checkSquareStochastic(Z): ...@@ -298,8 +302,7 @@ def checkSquareStochastic(Z):
raise InvalidMDPError(mdperr["mat_square"]) raise InvalidMDPError(mdperr["mat_square"])
# check that the matrix is square, and that each row sums to one # check that the matrix is square, and that each row sums to one
assert s1 == s2, mdperr["mat_square"] assert s1 == s2, mdperr["mat_square"]
assert (_np.abs(Z.sum(axis=1) - _np.ones(s2))).max() <= 2*_np.spacing(1), \ assert rowsSumToOne(Z, s2), mdperr["mat_stoch"]
mdperr["mat_stoch"]
# make sure that there are no values less than zero # make sure that there are no values less than zero
try: try:
assert (Z >= 0).all(), mdperr["mat_nonneg"] assert (Z >= 0).all(), mdperr["mat_nonneg"]
......
...@@ -42,7 +42,10 @@ class TestExampleForest(object): ...@@ -42,7 +42,10 @@ class TestExampleForest(object):
assert_equal(len(P), len(self.P)) assert_equal(len(P), len(self.P))
for a in range(len(self.P)): for a in range(len(self.P)):
assert_equal(P[a].shape, self.P[a].shape) assert_equal(P[a].shape, self.P[a].shape)
assert_equal((P[a] != sp.csr_matrix(self.P[a])).nnz, 0) try:
assert_equal((P[a] != sp.csr_matrix(self.P[a])).nnz, 0)
except AttributeError:
assert_true((P[a].todense() == self.P[a]).all())
assert_true((R == self.R).all()) assert_true((R == self.R).all())
assert_equal(R.shape, self.R.shape) assert_equal(R.shape, self.R.shape)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment