Commit b475e0a0 authored by Steven Cordwell's avatar Steven Cordwell
Browse files

fixed up the transition code

parent ccdbac17
......@@ -2,12 +2,36 @@
#import mdp
def str_base(num, base, numerals = '0123456789abcdefghijklmnopqrstuvwxyz'):
if base < 2 or base > len(numerals):
raise ValueError("str_base: base must be between 2 and %i" %
len(numerals))
if num == 0:
return '0'
if num < 0:
sign = '-'
num = -num
else:
sign = ''
result = ''
while num:
result = numerals[num % (base)] + result
num //= base
return sign + result
class TicTacToeMDP(object):
""""""
def __init__(self):
""""""
self.P = {}
for a in xrange(9):
self.P[a] = {}
self.R = {}
# some board states are equal, just rotations of other states
self.rotorder = []
......@@ -15,54 +39,69 @@ class TicTacToeMDP(object):
self.rotorder.append([6, 3, 0, 7, 4, 1, 8, 5, 2])
self.rotorder.append([8, 7, 6, 5, 4, 3, 2, 1, 0])
self.rotorder.append([2, 5, 8, 1, 4, 7, 0, 3, 6])
# The valid number of cells belonging to either the player or the
# opponent: (player, opponent)
self.nXO = ((0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(0, 1),
(1, 2),
(2, 3),
(3, 4))
# The winning positions
self.wins = ([1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 1, 0, 1, 0, 1, 0, 0])
def rotate(self, state):
rotations = []
#rotations = []
identity = []
rotations.append(state)
#rotations.append(state)
identity.append(int("".join(str(x) for x in state), 3))
for k in range(3):
rotations.append(tuple([state[self.rotorder[k][kk]]
for kk in xrange(9)]))
#rotations.append(tuple(state[self.rotorder[k][kk]]
# for kk in xrange(9)))
# Convert the state from base 3 number to integer.
identity.append(int("".join(str(x) for x in rotations[k + 1]), 3))
#identity.append(int("".join(str(x) for x in rotations[k + 1]), 3))
identity.append(int("".join(str(state[self.rotorder[k][kk]])
for kk in xrange(9)), 3))
# return the rotation with the smallest identity number
idx = identity.index(min(identity))
return (identity[idx], rotations[idx])
#idx = identity.index(min(identity))
#return (identity[idx], rotations[idx])
return min(identity)
def unrotate(self, move, rotation):
rotation -= 1
# return the move
return self.rotorder[rotation][move]
def isLegal(state, action):
def isLegal(self, state, action):
""""""
if state[action] == 0:
return True
else:
return False
def isWon(state):
def isWon(self, state, who):
""""""
wins = ([1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 1, 0, 1, 0, 1, 0, 0])
# Check to see if there are any wins
for w in wins:
S = sum([1 if (w[k] == 1 and state[k] == 1) else 0
for k in xrange(9)])
for w in self.wins:
S = sum(1 if (w[k] == 1 and state[k] == who) else 0
for k in xrange(9))
if S == 3:
# We have a win
return True
# There were no wins so return False
return False
def isDraw(state):
def isDraw(self, state):
""""""
try:
state.index(0)
......@@ -72,36 +111,62 @@ class TicTacToeMDP(object):
except:
raise
def isValid(self, state):
""""""
# S1 is the sum of the player's cells
S1 = sum(1 if x == 1 else 0 for x in state)
# S2 is the sum of the opponent's cells
S2 = sum(1 if x == 2 else 0 for x in state)
if (S1, S2) in self.nXO:
return True
else:
return False
def run(self):
""""""
nXO = ((0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(0, 1),
(1, 2),
(2, 3),
(3, 4))
#
l = (0,1,2)
# Iterate through a generator of all the combinations
for s in ([a0,a1,a2,a3,a4,a5,a6,a7,a8] for a0 in l for a1 in l
for a2 in l for a3 in l for a4 in l for a5 in l
for a6 in l for a7 in l for a8 in l):
if self.isValid(s):
self.transition(s)
# Convert P and R to ijv lists
# Iterate through up to the theorectically maxmimum value of s
for s in xrange(int('222211110',3)):
pass
# return (P, R)
def transition(self, s):
def toTuple(self, state):
""""""
state = str_base(state, 3)
state = ''.join('0' for x in range(9 - len(state))) + state
return tuple(int(x) for x in state)
def transition(self, state):
""""""
idn_s = int("".join(str(x) for x in s), 3)
legal_a = [a for a in xrange(9) if s[a] == 0]
for a1 in legal_a:
s[a1] = 1
legal_m = [a for a in xrange(9) if s[a] == 0]
for m1 in legal_m:
s_new = s
s_new[m1] = 2
idn_s_new, s_new = self.rotate(s_new)
if self.P.has_key((idn_s, idn_s_new)):
raise Exception("unexpected, P already has Pr(s,s')")
else:
self.P[(idn_s, idn_s_new)] = 1 / len(legal_m)
#TODO: the state needs to be rotated before anything else is done!!!
idn_s = int("".join(str(x) for x in state), 3)
legal_a = [x for x in xrange(9) if state[x] == 0]
for a in legal_a:
s = [x for x in state]
s[a] = 1
is_won = self.isWon(s, 1)
legal_m = [x for x in xrange(9) if s[x] == 0]
for m in legal_m:
s_new = [x for x in s]
s_new[m] = 2
idn_s_new = self.rotate(s_new)
if not self.P[a].has_key((idn_s, idn_s_new)):
self.P[a][(idn_s, idn_s_new)] = len(legal_m)
if not self.R.has_key((idn_s, idn_s_new)):
if is_won:
self.R[(idn_s, idn_s_new)] = 1
elif self.isWon(s_new, 2):
self.R[(idn_s, idn_s_new)] = -1
else:
self.R[(idn_s, idn_s_new)] = 0
if __name__ == "__main__":
P, R = TicTacToeMDP().run()
ttt = mdp.ValueIteration(P, R, 1)
\ No newline at end of file
#ttt = mdp.ValueIteration(P, R, 1)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment