Commit 9583a8ba by Steven Cordwell

use a numpy array for the rewards as sparse rewards are not currently working in mdptoolbox

parent 50719db4
 # -*- coding: utf-8 -*- # -*- coding: utf-8 -*- import numpy as np import numpy as np from scipy.sparse import dok_matrix as spdok from scipy.sparse import dok_matrix from mdptoolbox import mdp from mdptoolbox import mdp ... @@ -44,8 +44,9 @@ def getLegalActions(state): ... @@ -44,8 +44,9 @@ def getLegalActions(state): def getTransitionAndRewardArrays(): def getTransitionAndRewardArrays(): """""" """""" P = [spdok((STATES, STATES)) for a in range(ACTIONS)] P = [dok_matrix((STATES, STATES)) for a in range(ACTIONS)] R = spdok((STATES, ACTIONS)) #R = spdok((STATES, ACTIONS)) R = np.zeros((STATES, ACTIONS)) # Naive approach, iterate through all possible combinations # Naive approach, iterate through all possible combinations for a in range(ACTIONS): for a in range(ACTIONS): for s in range(STATES): for s in range(STATES): ... @@ -63,7 +64,7 @@ def getTransitionAndRewardArrays(): ... @@ -63,7 +64,7 @@ def getTransitionAndRewardArrays(): P[a][s, s1] = p P[a][s, s1] = p R[s, a] = r R[s, a] = r P[a] = P[a].tocsr() P[a] = P[a].tocsr() R = R.tocsc() #R = R.tolil() return(P, R) return(P, R) def getTransitionProbabilities(state, action): def getTransitionProbabilities(state, action): ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment