### [doctests] Make doctests more robust

```Use ellipsis so that Python 2 and Python 3 outputs will pass. Check that
floating point numbers are within 1e-12 of the expected answer rather
than exact matches.```
parent 0970cc4a
 ... @@ -2,4 +2,4 @@ ... @@ -2,4 +2,4 @@ # Is there any difference to using ``python setup.py nosetests``? # Is there any difference to using ``python setup.py nosetests``? nosetests --with-coverage --cover-package=mdptoolbox --with-doctest \ nosetests --with-coverage --cover-package=mdptoolbox --with-doctest \ --doctest-options='+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL' --doctest-options='+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL'
 ... @@ -140,10 +140,10 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False): ... @@ -140,10 +140,10 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False): >>> len(Psp) >>> len(Psp) 2 2 >>> Psp >>> Psp <3x3 sparse matrix of type '' <3x3 sparse matrix of type '<... 'numpy.float64'>' with 6 stored elements in Compressed Sparse Row format> with 6 stored elements in Compressed Sparse Row format> >>> Psp >>> Psp <3x3 sparse matrix of type '' <3x3 sparse matrix of type '<... 'numpy.int64'>' with 3 stored elements in Compressed Sparse Row format> with 3 stored elements in Compressed Sparse Row format> >>> Rsp >>> Rsp array([[ 0., 0.], array([[ 0., 0.], ... @@ -252,10 +252,10 @@ def rand(S, A, is_sparse=False, mask=None): ... @@ -252,10 +252,10 @@ def rand(S, A, is_sparse=False, mask=None): >>> len(Psp), len(Rsp) >>> len(Psp), len(Rsp) (5, 5) (5, 5) >>> Psp >>> Psp <100x100 sparse matrix of type '' <100x100 sparse matrix of type '<... 'numpy.float64'>' with 3296 stored elements in Compressed Sparse Row format> with 3296 stored elements in Compressed Sparse Row format> >>> Rsp >>> Rsp <100x100 sparse matrix of type '' <100x100 sparse matrix of type '<... 'numpy.float64'>' with 3296 stored elements in Compressed Sparse Row format> with 3296 stored elements in Compressed Sparse Row format> >>> # The number of non-zero elements (nnz) in P and R are equal >>> # The number of non-zero elements (nnz) in P and R are equal >>> Psp.nnz == Rsp.nnz >>> Psp.nnz == Rsp.nnz ... ...
 ... @@ -546,8 +546,9 @@ class PolicyIteration(MDP): ... @@ -546,8 +546,9 @@ class PolicyIteration(MDP): >>> P, R = mdptoolbox.example.forest() >>> P, R = mdptoolbox.example.forest() >>> pi = mdptoolbox.mdp.PolicyIteration(P, R, 0.9) >>> pi = mdptoolbox.mdp.PolicyIteration(P, R, 0.9) >>> pi.run() >>> pi.run() >>> pi.V >>> expected = (26.244000000000014, 29.484000000000016, 33.484000000000016) (26.244000000000014, 29.484000000000016, 33.484000000000016) >>> all(expected[k] - pi.V[k] < 1e-12 for k in range(len(expected))) True >>> pi.policy >>> pi.policy (0, 0, 0) (0, 0, 0) """ """ ... @@ -820,8 +821,9 @@ class PolicyIterationModified(PolicyIteration): ... @@ -820,8 +821,9 @@ class PolicyIterationModified(PolicyIteration): >>> pim.run() >>> pim.run() >>> pim.policy >>> pim.policy (0, 0, 0) (0, 0, 0) >>> pim.V >>> expected = (21.81408652334702, 25.054086523347017, 29.054086523347017) (21.81408652334702, 25.054086523347017, 29.054086523347017) >>> all(expected[k] - pim.V[k] < 1e-12 for k in range(len(expected))) True """ """ ... @@ -942,8 +944,9 @@ class QLearning(MDP): ... @@ -942,8 +944,9 @@ class QLearning(MDP): array([[ 11.198909 , 10.34652034], array([[ 11.198909 , 10.34652034], [ 10.74229967, 11.74105792], [ 10.74229967, 11.74105792], [ 2.86980001, 12.25973286]]) [ 2.86980001, 12.25973286]]) >>> ql.V >>> expected = (11.198908998901134, 11.741057920409865, 12.259732864170232) (11.198908998901134, 11.741057920409865, 12.259732864170232) >>> all(expected[k] - ql.V[k] < 1e-12 for k in range(len(expected))) True >>> ql.policy >>> ql.policy (0, 1, 1) (0, 1, 1) ... @@ -957,8 +960,9 @@ class QLearning(MDP): ... @@ -957,8 +960,9 @@ class QLearning(MDP): >>> ql.Q >>> ql.Q array([[ 33.33010866, 40.82109565], array([[ 33.33010866, 40.82109565], [ 34.37431041, 29.67236845]]) [ 34.37431041, 29.67236845]]) >>> ql.V >>> expected = (40.82109564847122, 34.37431040682546) (40.82109564847122, 34.37431040682546) >>> all(expected[k] - ql.V[k] < 1e-12 for k in range(len(expected))) True >>> ql.policy >>> ql.policy (1, 0) (1, 0) ... @@ -1110,8 +1114,9 @@ class RelativeValueIteration(MDP): ... @@ -1110,8 +1114,9 @@ class RelativeValueIteration(MDP): >>> R = np.array([[5, 10], [-1, 2]]) >>> R = np.array([[5, 10], [-1, 2]]) >>> rvi = mdptoolbox.mdp.RelativeValueIteration(P, R) >>> rvi = mdptoolbox.mdp.RelativeValueIteration(P, R) >>> rvi.run() >>> rvi.run() >>> rvi.V >>> expected = (10.0, 3.885235246411831) (10.0, 3.885235246411831) >>> all(expected[k] - rvi.V[k] < 1e-12 for k in range(len(expected))) True >>> rvi.average_reward >>> rvi.average_reward 3.8852352464118312 3.8852352464118312 >>> rvi.policy >>> rvi.policy ... @@ -1252,8 +1257,9 @@ class ValueIteration(MDP): ... @@ -1252,8 +1257,9 @@ class ValueIteration(MDP): >>> vi.verbose >>> vi.verbose False False >>> vi.run() >>> vi.run() >>> vi.V >>> expected = (5.93215488, 9.38815488, 13.38815488) (5.93215488, 9.38815488, 13.38815488) >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected))) True >>> vi.policy >>> vi.policy (0, 0, 0) (0, 0, 0) >>> vi.iter >>> vi.iter ... @@ -1264,38 +1270,10 @@ class ValueIteration(MDP): ... @@ -1264,38 +1270,10 @@ class ValueIteration(MDP): >>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]]) >>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]]) >>> R = np.array([[5, 10], [-1, 2]]) >>> R = np.array([[5, 10], [-1, 2]]) >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9) >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9) >>> vi.setVerbose() >>> vi.run() >>> vi.run() Iteration V-variation >>> expected = (40.048625392716815, 33.65371175967546) 1 8.0 >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected))) 2 2.76 True 3 1.9872 4 1.430784 5 1.03016448 6 0.7417184256 7 0.534037266432 8 0.384506831831 9 0.276844918918 10 0.199328341621 11 0.143516405967 12 0.103331812296 13 0.0743989048534 14 0.0535672114945 15 0.038568392276 16 0.0277692424387 17 0.0199938545559 18 0.0143955752802 19 0.0103648142018 20 0.00746266622526 21 0.00537311968218 22 0.00386864617116 23 0.00278542524322 24 0.00200550617512 25 0.00144396444609 26 0.0010396544012 Iterating stopped, epsilon-optimal policy found. >>> vi.V (40.048625392716815, 33.65371175967546) >>> vi.policy >>> vi.policy (1, 0) (1, 0) >>> vi.iter >>> vi.iter ... @@ -1310,8 +1288,9 @@ class ValueIteration(MDP): ... @@ -1310,8 +1288,9 @@ class ValueIteration(MDP): >>> R = np.array([[5, 10], [-1, 2]]) >>> R = np.array([[5, 10], [-1, 2]]) >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9) >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9) >>> vi.run() >>> vi.run() >>> vi.V >>> expected = (40.048625392716815, 33.65371175967546) (40.048625392716815, 33.65371175967546) >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected))) True >>> vi.policy >>> vi.policy (1, 0) (1, 0) ... @@ -1469,12 +1448,13 @@ class ValueIterationGS(ValueIteration): ... @@ -1469,12 +1448,13 @@ class ValueIterationGS(ValueIteration): Examples Examples -------- -------- >>> import mdptoolbox, mdptoolbox.example >>> import mdptoolbox.example, numpy as np >>> P, R = mdptoolbox.example.forest() >>> P, R = mdptoolbox.example.forest() >>> vigs = mdptoolbox.mdp.ValueIterationGS(P, R, 0.9) >>> vigs = mdptoolbox.mdp.ValueIterationGS(P, R, 0.9) >>> vigs.run() >>> vigs.run() >>> vigs.V >>> expected = (25.5833879767579, 28.830654635546928, 32.83065463554693) (25.5833879767579, 28.830654635546928, 32.83065463554693) >>> all(expected[k] - vigs.V[k] < 1e-12 for k in range(len(expected))) True >>> vigs.policy >>> vigs.policy (0, 0, 0) (0, 0, 0) ... ...
 ... @@ -273,6 +273,10 @@ def check(P, R): ... @@ -273,6 +273,10 @@ def check(P, R): # what the reward arrar is reporting agree as to the number of actions # what the reward arrar is reporting agree as to the number of actions # and states. If not then fail explaining the situation # and states. If not then fail explaining the situation def rowsSumToOne(Z, n): return((_np.abs(Z.sum(axis=1) - _np.ones(n))).max() <= 10 * _np.spacing(_np.float64(1))) def checkSquareStochastic(Z): def checkSquareStochastic(Z): """Check if Z is a square stochastic matrix. """Check if Z is a square stochastic matrix. ... @@ -298,8 +302,7 @@ def checkSquareStochastic(Z): ... @@ -298,8 +302,7 @@ def checkSquareStochastic(Z): raise InvalidMDPError(mdperr["mat_square"]) raise InvalidMDPError(mdperr["mat_square"]) # check that the matrix is square, and that each row sums to one # check that the matrix is square, and that each row sums to one assert s1 == s2, mdperr["mat_square"] assert s1 == s2, mdperr["mat_square"] assert (_np.abs(Z.sum(axis=1) - _np.ones(s2))).max() <= 2*_np.spacing(1), \ assert rowsSumToOne(Z, s2), mdperr["mat_stoch"] mdperr["mat_stoch"] # make sure that there are no values less than zero # make sure that there are no values less than zero try: try: assert (Z >= 0).all(), mdperr["mat_nonneg"] assert (Z >= 0).all(), mdperr["mat_nonneg"] ... ...
 ... @@ -42,7 +42,10 @@ class TestExampleForest(object): ... @@ -42,7 +42,10 @@ class TestExampleForest(object): assert_equal(len(P), len(self.P)) assert_equal(len(P), len(self.P)) for a in range(len(self.P)): for a in range(len(self.P)): assert_equal(P[a].shape, self.P[a].shape) assert_equal(P[a].shape, self.P[a].shape) assert_equal((P[a] != sp.csr_matrix(self.P[a])).nnz, 0) try: assert_equal((P[a] != sp.csr_matrix(self.P[a])).nnz, 0) except AttributeError: assert_true((P[a].todense() == self.P[a]).all()) assert_true((R == self.R).all()) assert_true((R == self.R).all()) assert_equal(R.shape, self.R.shape) assert_equal(R.shape, self.R.shape) ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!