Commit 9583a8ba authored by Steven Cordwell's avatar Steven Cordwell
Browse files

use a numpy array for the rewards as sparse rewards are not currently working in mdptoolbox

parent 50719db4
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import numpy as np import numpy as np
from scipy.sparse import dok_matrix as spdok from scipy.sparse import dok_matrix
from mdptoolbox import mdp from mdptoolbox import mdp
...@@ -44,8 +44,9 @@ def getLegalActions(state): ...@@ -44,8 +44,9 @@ def getLegalActions(state):
def getTransitionAndRewardArrays(): def getTransitionAndRewardArrays():
"""""" """"""
P = [spdok((STATES, STATES)) for a in range(ACTIONS)] P = [dok_matrix((STATES, STATES)) for a in range(ACTIONS)]
R = spdok((STATES, ACTIONS)) #R = spdok((STATES, ACTIONS))
R = np.zeros((STATES, ACTIONS))
# Naive approach, iterate through all possible combinations # Naive approach, iterate through all possible combinations
for a in range(ACTIONS): for a in range(ACTIONS):
for s in range(STATES): for s in range(STATES):
...@@ -63,7 +64,7 @@ def getTransitionAndRewardArrays(): ...@@ -63,7 +64,7 @@ def getTransitionAndRewardArrays():
P[a][s, s1] = p P[a][s, s1] = p
R[s, a] = r R[s, a] = r
P[a] = P[a].tocsr() P[a] = P[a].tocsr()
R = R.tocsc() #R = R.tolil()
return(P, R) return(P, R)
def getTransitionProbabilities(state, action): def getTransitionProbabilities(state, action):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment