tictactoe.py 5.21 KB
 Steven Cordwell committed Feb 14, 2013 1 2 ``````# -*- coding: utf-8 -*- `````` Steven Cordwell committed Mar 13, 2014 3 ``````import numpy as np `````` 4 ``````from scipy.sparse import dok_matrix `````` Steven Cordwell committed Feb 14, 2013 5 `````` `````` Steven Cordwell committed Mar 13, 2014 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 ``````from mdptoolbox import mdp ACTIONS = 9 STATES = 3**ACTIONS PLAYER = 1 OPPONENT = 2 WINS = ([1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 1, 1], [1, 0, 0, 1, 0, 0, 1, 0, 0], [0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 0, 0, 1, 0, 0, 1], [1, 0, 0, 0, 1, 0, 0, 0, 1], [0, 0, 1, 0, 1, 0, 1, 0, 0]) # The valid number of cells belonging to either the player or the opponent: # (player, opponent) OWNED_CELLS = ((0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 1), (1, 2), (2, 3), (3, 4)) def convertIndexToTuple(state): """""" return(tuple(int(x) for x in np.base_repr(state, 3, 9)[-9::])) def convertTupleToIndex(state): """""" return(int("".join(str(x) for x in state), 3)) def getLegalActions(state): """""" return(tuple(x for x in range(ACTIONS) if state[x] == 0)) def getTransitionAndRewardArrays(): """""" `````` 47 48 49 `````` P = [dok_matrix((STATES, STATES)) for a in range(ACTIONS)] #R = spdok((STATES, ACTIONS)) R = np.zeros((STATES, ACTIONS)) `````` Steven Cordwell committed Mar 13, 2014 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 `````` # Naive approach, iterate through all possible combinations for a in range(ACTIONS): for s in range(STATES): state = convertIndexToTuple(s) if not isValid(state): # There are no defined moves from an invalid state, so # transition probabilities cannot be calculated. However, # P must be a square stochastic matrix, so assign a # probability of one to the invalid state transitioning # back to itself. P[a][s, s] = 1 # Reward is 0 else: s1, p, r = getTransitionProbabilities(state, a) P[a][s, s1] = p R[s, a] = r P[a] = P[a].tocsr() `````` 67 `````` #R = R.tolil() `````` Steven Cordwell committed Mar 13, 2014 68 69 70 71 72 73 74 75 76 77 `````` return(P, R) def getTransitionProbabilities(state, action): """ Parameters ---------- state : tuple The state action : int The action `````` Steven Cordwell committed Feb 14, 2013 78 `````` `````` Steven Cordwell committed Mar 13, 2014 79 80 81 82 `````` Returns ------- s1, p, r : tuple of two lists and an int s1 are the next states, p are the probabilities, and r is the reward `````` Steven Cordwell committed Feb 14, 2013 83 `````` `````` Steven Cordwell committed Mar 13, 2014 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 `````` """ #assert isValid(state) assert 0 <= action < ACTIONS if not isLegal(state, action): # If the action is illegal, then transition back to the same state but # incur a high negative reward s1 = [convertTupleToIndex(state)] return(s1, [1], -10) # Update the state with the action state = list(state) state[action] = PLAYER if isWon(state, PLAYER): # If the player's action is a winning move then transition to the # winning state and receive a reward of 1. s1 = [convertTupleToIndex(state)] return(s1, [1], 1) elif isDraw(state): s1 = [convertTupleToIndex(state)] return(s1, [1], 0) # Now we search through the opponents moves, and calculate transition # probabilities based on maximising the opponents chance of winning.. s1 = [] p = [] legal_a = getLegalActions(state) for a in legal_a: state[a] = OPPONENT # If the opponent is going to win, we assume that the winning move will # be chosen: if isWon(state, OPPONENT): s1 = [convertTupleToIndex(state)] return(s1, [1], -1) elif isDraw(state): s1 = [convertTupleToIndex(state)] return(s1, [1], 0) # Otherwise we assume the opponent will select a move with uniform # probability across potential moves: s1.append(convertTupleToIndex(state)) p.append(1.0 / len(legal_a)) state[a] = 0 # During non-terminal play states the reward is 0. return(s1, p, 0) def getReward(state, action): """""" if not isLegal(state, action): return -100 state = list(state) state[action] = PLAYER if isWon(state, PLAYER): return 1 elif isWon(state, OPPONENT): return -1 `````` Steven Cordwell committed Feb 14, 2013 136 `````` else: `````` Steven Cordwell committed Mar 13, 2014 137 `````` return 0 `````` Steven Cordwell committed Feb 14, 2013 138 `````` `````` Steven Cordwell committed Mar 13, 2014 139 140 141 142 143 144 145 ``````def isDraw(state): """""" try: state.index(0) return False except ValueError: return True `````` Steven Cordwell committed Feb 14, 2013 146 `````` `````` Steven Cordwell committed Mar 13, 2014 147 ``````def isLegal(state, action): `````` Steven Cordwell committed Feb 14, 2013 148 `````` """""" `````` Steven Cordwell committed Mar 13, 2014 149 150 151 `````` if state[action] == 0: return True else: `````` Steven Cordwell committed Feb 14, 2013 152 `````` return False `````` Steven Cordwell committed Mar 13, 2014 153 154 155 `````` def isWon(state, who): """Test if a tic-tac-toe game has been won. `````` Steven Cordwell committed Feb 14, 2013 156 `````` `````` Steven Cordwell committed Mar 13, 2014 157 158 `````` Assumes that the board is in a legal state. Will test if the value 1 is in any winning combination. `````` Steven Cordwell committed Feb 14, 2013 159 `````` `````` Steven Cordwell committed Mar 13, 2014 160 161 162 163 164 165 `````` """ for w in WINS: S = sum(1 if (w[k] == state[k] == who) else 0 for k in range(ACTIONS)) if S == 3: # We have a win `````` Steven Cordwell committed Feb 14, 2013 166 `````` return True `````` Steven Cordwell committed Mar 13, 2014 167 168 169 170 171 172 173 174 175 176 177 178 179 `````` # There were no wins so return False return False def isValid(state): """""" # S1 is the sum of the player's cells S1 = sum(1 if x == PLAYER else 0 for x in state) # S2 is the sum of the opponent's cells S2 = sum(1 if x == OPPONENT else 0 for x in state) if (S1, S2) in OWNED_CELLS: return True else: return False `````` Steven Cordwell committed Feb 14, 2013 180 181 `````` if __name__ == "__main__": `````` Steven Cordwell committed Mar 13, 2014 182 183 184 `````` P, R = getTransitionAndRewardArrays() ttt = mdp.ValueIteration(P, R, 1) print(ttt.policy)``````