tictactoe.py 5.63 KB
Newer Older
1 2
# -*- coding: utf-8 -*-

3
#import mdp
4

Steven Cordwell's avatar
Steven Cordwell committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
def str_base(num, base, numerals = '0123456789abcdefghijklmnopqrstuvwxyz'):
    if base < 2 or base > len(numerals):
        raise ValueError("str_base: base must be between 2 and %i" % 
                         len(numerals))
    
    if num == 0:
        return '0'
    
    if num < 0:
        sign = '-'
        num = -num
    else:
        sign = ''
    
    result = ''
    while num:
        result = numerals[num % (base)] + result
        num //= base
    
    return sign + result


27 28 29 30 31
class TicTacToeMDP(object):
    """"""
         
    def __init__(self):
        """"""
32
        self.P = [None] * 9
Steven Cordwell's avatar
Steven Cordwell committed
33 34
        for a in xrange(9):
            self.P[a] = {}
35 36 37 38 39 40 41
        self.R = {}
        # some board states are equal, just rotations of other states
        self.rotorder = []
        #self.rotorder.append([0, 1, 2, 3, 4, 5, 6, 7, 8])
        self.rotorder.append([6, 3, 0, 7, 4, 1, 8, 5, 2])
        self.rotorder.append([8, 7, 6, 5, 4, 3, 2, 1, 0])
        self.rotorder.append([2, 5, 8, 1, 4, 7, 0, 3, 6])
Steven Cordwell's avatar
Steven Cordwell committed
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
        # The valid number of cells belonging to either the player or the
        # opponent: (player, opponent)
        self.nXO = ((0, 0),
                    (1, 1),
                    (2, 2),
                    (3, 3),
                    (4, 4),
                    (0, 1),
                    (1, 2),
                    (2, 3),
                    (3, 4))
        # The winning positions
        self.wins = ([1, 1, 1, 0, 0, 0, 0, 0, 0],
                     [0, 0, 0, 1, 1, 1, 0, 0, 0],
                     [0, 0, 0, 0, 0, 0, 1, 1, 1],
                     [1, 0, 0, 1, 0, 0, 1, 0, 0],
                     [0, 1, 0, 0, 1, 0, 0, 1, 0],
                     [0, 0, 1, 0, 0, 1, 0, 0, 1],
                     [1, 0, 0, 0, 1, 0, 0, 0, 1],
                     [0, 0, 1, 0, 1, 0, 1, 0, 0])
62 63
    
    def rotate(self, state):
Steven Cordwell's avatar
Steven Cordwell committed
64
        #rotations = []
65
        identity = []
Steven Cordwell's avatar
Steven Cordwell committed
66
        #rotations.append(state)
67 68
        identity.append(int("".join(str(x) for x in state), 3))
        for k in range(3):
Steven Cordwell's avatar
Steven Cordwell committed
69 70
            #rotations.append(tuple(state[self.rotorder[k][kk]]
            #                          for kk in xrange(9)))
71
            # Convert the state from base 3 number to integer.
Steven Cordwell's avatar
Steven Cordwell committed
72 73 74
            #identity.append(int("".join(str(x) for x in rotations[k + 1]), 3))
            identity.append(int("".join(str(state[self.rotorder[k][kk]])
                                        for kk in xrange(9)), 3))
75
        # return the rotation with the smallest identity number
Steven Cordwell's avatar
Steven Cordwell committed
76 77 78
        #idx = identity.index(min(identity))
        #return (identity[idx], rotations[idx])
        return min(identity)
79 80 81 82 83
    
    def unrotate(self, move, rotation):
        rotation -= 1
        # return the move
        return self.rotorder[rotation][move]
84
    
Steven Cordwell's avatar
Steven Cordwell committed
85
    def isLegal(self, state, action):
86 87 88 89 90 91
        """"""
        if state[action] == 0:
            return True
        else:
            return False
    
Steven Cordwell's avatar
Steven Cordwell committed
92
    def isWon(self, state, who):
93 94
        """"""
        # Check to see if there are any wins
Steven Cordwell's avatar
Steven Cordwell committed
95 96 97
        for w in self.wins:
            S = sum(1 if (w[k] == 1 and state[k] == who) else 0
                    for k in xrange(9))
98 99 100 101 102 103
            if S == 3:
                # We have a win
                return True
        # There were no wins so return False
        return False
    
Steven Cordwell's avatar
Steven Cordwell committed
104
    def isDraw(self, state):
105 106 107 108 109 110 111 112 113
        """"""
        try:
            state.index(0)
            return False
        except ValueError:
            return True
        except:
            raise
    
Steven Cordwell's avatar
Steven Cordwell committed
114 115 116 117 118 119 120 121 122 123
    def isValid(self, state):
        """"""
        # S1 is the sum of the player's cells
        S1 = sum(1 if x == 1 else 0 for x in state)
        # S2 is the sum of the opponent's cells
        S2 = sum(1 if x == 2 else 0 for x in state)
        if (S1, S2) in self.nXO:
            return True
        else:
            return False
124 125 126 127 128 129 130 131 132
    
    def getReward(self, s):
        if self.isWon(s, 1):
            return 1
        elif self.isWon(s, 2):
            return -1
        else:
            return 0
    
133 134
    def run(self):
        """"""
Steven Cordwell's avatar
Steven Cordwell committed
135 136
        l = (0,1,2)
        # Iterate through a generator of all the combinations
137
        for s in ((a0,a1,a2,a3,a4,a5,a6,a7,a8) for a0 in l for a1 in l
Steven Cordwell's avatar
Steven Cordwell committed
138 139 140
                   for a2 in l for a3 in l for a4 in l for a5 in l
                   for a6 in l for a7 in l for a8 in l):
            if self.isValid(s):
141 142 143
                s_idn = self.rotate(s)
                if not self.R.has_key(s_idn):
                    self.R[s_idn] = self.getReward(s)
Steven Cordwell's avatar
Steven Cordwell committed
144 145 146 147
                self.transition(s)
        # Convert P and R to ijv lists
        # Iterate through up to the theorectically maxmimum value of s
        for s in xrange(int('222211110',3)):
148
            print s
149 150
        # return (P, R)
    
Steven Cordwell's avatar
Steven Cordwell committed
151 152 153 154 155 156 157
    def toTuple(self, state):
        """"""
        state = str_base(state, 3)
        state = ''.join('0' for x in range(9 - len(state))) + state
        return tuple(int(x) for x in state)
        
    def transition(self, state):
158
        """"""
Steven Cordwell's avatar
Steven Cordwell committed
159 160 161 162 163 164 165 166 167 168 169 170 171 172
        #TODO: the state needs to be rotated before anything else is done!!!
        idn_s = int("".join(str(x) for x in state), 3)
        legal_a = [x for x in xrange(9) if state[x] == 0]
        for a in legal_a:
            s = [x for x in state]
            s[a] = 1
            is_won = self.isWon(s, 1)
            legal_m = [x for x in xrange(9) if s[x] == 0]
            for m in legal_m:
                s_new = [x for x in s]
                s_new[m] = 2
                idn_s_new = self.rotate(s_new)
                if not self.P[a].has_key((idn_s, idn_s_new)):
                    self.P[a][(idn_s, idn_s_new)] = len(legal_m)
173
                    
174 175 176

if __name__ == "__main__":
    P, R = TicTacToeMDP().run()
177
    #ttt = mdp.ValueIteration(P, R, 1)