Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ultimate TicTacToe game implementation #202

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a24081a
feat: add initial structure to game
csosapezo Jun 25, 2020
9ffca2a
feat: add game logic
csosapezo Jun 26, 2020
59012d0
chores: add sys path
csosapezo Jun 26, 2020
0a1afd1
Cubi TicTacToe
Cubi123 Jun 26, 2020
732be34
UT3Players
Cubi123 Jun 26, 2020
c5deedb
nnet
ivonneH123 Jun 27, 2020
02d29a8
Merge branch 'CubiBranch' of https://github.com/Cubi123/alpha-zero-ul…
ivonneH123 Jun 27, 2020
a1f8988
Update UltimateTicTacToeLogic.py
ivonneH123 Jun 27, 2020
d4200df
recursive bug
ivonneH123 Jun 27, 2020
1e2f9f4
Merge pull request #2 from Cubi123/CubiBranch
csosapezo Jun 27, 2020
377bdfa
feat: test environment
csosapezo Jun 27, 2020
5249cdc
feat: finish test environment
csosapezo Jun 30, 2020
352cd5e
fix bug that caused an inverse diagonal not being counted as a local
csosapezo Jul 2, 2020
f527234
Merge branch 'testing' into dev
csosapezo Jul 2, 2020
f443f9e
delete test
csosapezo Jul 2, 2020
2a49772
optimize imports
csosapezo Jul 2, 2020
206bae9
revert to base repository
csosapezo Jul 2, 2020
6e4f910
format nnet class file
csosapezo Jul 2, 2020
6ac789d
format uutnet file
csosapezo Jul 2, 2020
864fa84
delete temp gitignore
csosapezo Jul 2, 2020
f8f79ec
revert to initial state
csosapezo Jul 2, 2020
069cc0a
manage h5 with git large files
csosapezo Jul 2, 2020
c24f799
add README.md for UltimateTicTacToe
csosapezo Jul 2, 2020
22ddb1f
add test scripts for UltimateTicTacToe
csosapezo Jul 2, 2020
36e3ea1
update README.md
csosapezo Jul 2, 2020
321f632
delete .vscode directory
csosapezo Jul 2, 2020
16fcc52
update .gitignore for VSC files
csosapezo Jul 2, 2020
9a039a9
restore logging
csosapezo Jul 2, 2020
6c2c0e1
revert change on wrong file
csosapezo Jul 2, 2020
df1d83b
delete requirements.txt
csosapezo Jul 2, 2020
833e63b
add UltimateTicTacToe test (test successful)
csosapezo Jul 2, 2020
9724567
add UltimateTicTacToe test (test successful)
csosapezo Jul 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.h5 filter=lfs diff=lfs merge=lfs -text
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ checkpoints/
# For PyCharm users
.idea/

# For Visual Studio Code user
.vscode/
4 changes: 2 additions & 2 deletions Coach.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,15 @@ def learn(self):
self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree
iterationTrainExamples += self.executeEpisode()

# save the iteration examples to the history
# save the iteration examples to the history
self.trainExamplesHistory.append(iterationTrainExamples)

if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
log.warning(
f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}")
self.trainExamplesHistory.pop(0)
# backup history to a file
# NB! the examples were collected using the model from the previous iteration, so (i-1)
# NB! the examples were collected using the model from the previous iteration, so (i-1)
self.saveTrainExamples(i - 1)

# shuffle examples before training
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ def main():


if __name__ == "__main__":
main()
main()
6 changes: 6 additions & 0 deletions test_all_games.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
from gobang.keras.NNet import NNetWrapper as GobangKerasNNet
from gobang.tensorflow.NNet import NNetWrapper as GobangTensorflowNNet

from ultimate_tictactoe.UltimateTicTacToeGame import UltimateTicTacToeGame
from ultimate_tictactoe.keras.NNet import NNetWrapper as UltimateTicTacToeKerasNNet

import numpy as np
from utils import *

Expand Down Expand Up @@ -80,6 +83,9 @@ def test_gobang_keras(self):
def test_gobang_tensorflow(self):
self.execute_game_test(GobangGame(), GobangTensorflowNNet)

def test_ultimate_tic_tac_toe_keras(self):
self.execute_game_test(UltimateTicTacToeGame(), UltimateTicTacToeKerasNNet)


if __name__ == '__main__':
unittest.main()
41 changes: 41 additions & 0 deletions ultimate_tictactoe/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Ultimate TicTacToe implementation for Alpha Zero General
*Carlos Sosa, Eduardo Cuya, Ivonne Heredia, David Aguilar 2020*

This is part of a undergraduate course final project in which different reinforcement learning algorithms are tested on Ultimate TicTacToe.
This part includes an implementation of that game in Alpha Zero General wrapper created by Surag Nair in [suragnair/alpha-zero-general](https://github.com/suragnair/alpha-zero-general).

## Game Description

An Ultimate TicTacToe board consists on a 9x9 grid which represents a TicTacToe board filled with TicTacToe 3x3 local boards.


After the first move, each move on a game is restricted to the relative board that correspond to the spot in which the last move was made. For example, if X plays in the bottom right space in their local board, O can only choose for their next move a square on the bottom right local board unless that local board was already won by someone or ended as a draw. In that case, O can choose any free local board.

If a player wins a local board, it is considered as making a move on that position on the global board. The game ends when a player wins the global board or there are no moves left.

## State Representation

During a game, the board is represented by an object that has three attributes:

- **pieces:** a (9,9) Numpy array that represents the board state
- **win_status:** a (3,3) Numpy array that represents the global board
- **last_move:** a tuple containing the position of the last move in the game

## Implementation

Game, Logic and Players implementations ared based on TicTacToe implementation by Evgeny Tyurin. UltimateTicTacToeNNet is based on the Keras implementation of OthelloNNet by Shantanu Kumar.

## Test Scripts

To train a model for Ultimate Tic Tac Toe:
````bash
python ultimate_tic_tac_toe/utt_main.py
````
To test a model against a random player or a human player:
````bash
python ultimate_tic_tac_toe/utt_pit.py
````

## Experiments

We trained a Keras model for Ultimate TicTacToe (15 iterations, 100 episodes, 20 epochs per iteration and 25 MCTS simulations per turn) for about 50 hours on an AMD Radeon Pro 560 4GB with OpenCL and PlaidML.
105 changes: 105 additions & 0 deletions ultimate_tictactoe/UltimateTicTacToeGame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import sys

import numpy as np

from Game import Game
from ultimate_tictactoe.UltimateTicTacToeLogic import Board

sys.path.append('..')


class UltimateTicTacToeGame(Game):
def __init__(self, n=3):
super().__init__()
self.n = n
self.N = n ** 2

def getInitBoard(self):
return Board(self.n)

def getBoardSize(self):
return self.N, self.N

def getActionSize(self):
return (self.N ** 2) + 1

def getNextState(self, board, player, action):
if action == self.N ** 2:
return board, -player

b = Board(self.n)
b.copy(board)

move = (int(action / self.N), action % self.N)
b.execute_move(move, player)

return b, -player

def getValidMoves(self, board, player):
valid_move = [0] * self.getActionSize()

b = Board(self.n)
b.copy(board)

legal_moves = b.get_legal_moves()

if len(legal_moves) == 0:
valid_move[-1] = 1
return np.array(valid_move)

for x, y in legal_moves:
valid_move[self.N * x + y] = 1

return np.array(valid_move)

def getGameEnded(self, board, player):
b = Board(self.n)
b.copy(board)

if b.is_win(player):
return 1
if b.is_win(-player):
return -1
if b.has_legal_moves():
return 0

return 1e-4

def getCanonicalForm(self, board, player):
b = Board(self.n)
b.copy(board)
b.get_canonical_form(player)
return b

def getSymmetries(self, board, pi):
assert (len(pi) == self.N ** 2 + 1)
pi_board = np.reshape(pi[:-1], (self.N, self.N))
symmetry_list = []

for i in range(1, 5):
for j in [True, False]:
new_b = board.rot90(i, copy=True)
new_pi = np.rot90(pi_board, i)
if j:
new_b = board.fliplr(copy=True)
new_pi = np.fliplr(new_pi)
symmetry_list += [(new_b, list(new_pi.ravel()) + [pi[-1]])]
return symmetry_list

def stringRepresentation(self, board):
return board.tostring()

@staticmethod
def display(board):
value = {-1: "X", 1: 'O', 0: '.'}
board_pieces = board.pieces
for row in range(len(board_pieces)):
for item in range(len(board_pieces[0])):
if item != len(board_pieces[0]) - 1:
print(value[board_pieces[row][item]], end=' ')
elif item == len(board_pieces[0]) - 1:
print(value[board_pieces[row][item]])
if item in [2, 5]:
print('|', end=' ')
if row in [2, 5]:
print('------+-------+------')
Loading