suragnair · csosapezo · Jun 25, 2020 · Jun 26, 2020 · Jun 26, 2020 · Jun 26, 2020
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.h5 filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,5 @@ checkpoints/
 # For PyCharm users
 .idea/
 
+# For Visual Studio Code user
+.vscode/
diff --git a/Coach.py b/Coach.py
@@ -88,15 +88,15 @@ def learn(self):
                     self.mcts = MCTS(self.game, self.nnet, self.args)  # reset search tree
                     iterationTrainExamples += self.executeEpisode()
 
-                # save the iteration examples to the history 
+                # save the iteration examples to the history
                 self.trainExamplesHistory.append(iterationTrainExamples)
 
             if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                 log.warning(
                     f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}")
                 self.trainExamplesHistory.pop(0)
             # backup history to a file
-            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
+            # NB! the examples were collected using the model from the previous iteration, so (i-1)
             self.saveTrainExamples(i - 1)
 
             # shuffle examples before training

diff --git a/main.py b/main.py
@@ -54,4 +54,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/test_all_games.py b/test_all_games.py
@@ -43,6 +43,9 @@
 from gobang.keras.NNet import NNetWrapper as GobangKerasNNet
 from gobang.tensorflow.NNet import NNetWrapper as GobangTensorflowNNet
 
+from ultimate_tictactoe.UltimateTicTacToeGame import UltimateTicTacToeGame
+from ultimate_tictactoe.keras.NNet import NNetWrapper as UltimateTicTacToeKerasNNet
+
 import numpy as np
 from utils import *
 
@@ -80,6 +83,9 @@ def test_gobang_keras(self):
     def test_gobang_tensorflow(self):
         self.execute_game_test(GobangGame(), GobangTensorflowNNet)
 
+    def test_ultimate_tic_tac_toe_keras(self):
+        self.execute_game_test(UltimateTicTacToeGame(), UltimateTicTacToeKerasNNet)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/ultimate_tictactoe/README.md b/ultimate_tictactoe/README.md
@@ -0,0 +1,41 @@
+# Ultimate TicTacToe implementation for Alpha Zero General
+*Carlos Sosa, Eduardo Cuya, Ivonne Heredia, David Aguilar 2020*
+
+This is part of a undergraduate course final project in which different reinforcement learning algorithms are tested on Ultimate TicTacToe.
+This part includes an implementation of that game in Alpha Zero General wrapper created by Surag Nair in [suragnair/alpha-zero-general](https://github.com/suragnair/alpha-zero-general).
+
+## Game Description
+
+An Ultimate TicTacToe board consists on a 9x9 grid which represents a TicTacToe board filled with TicTacToe 3x3 local boards.
+
+
+After the first move, each move on a game is restricted to the relative board that correspond to the spot in which the last move was made. For example, if X plays in the bottom right space in their local board, O can only choose for their next move a square on the bottom right local board unless that local board was already won by someone or ended as a draw. In that case, O can choose any free local board.
+
+If a player wins a local board, it is considered as making a move on that position on the global board. The game ends when a player wins the global board or there are no moves left.
+
+## State Representation
+
+During a game, the board is represented by an object that has three attributes:
+
+- **pieces:** a (9,9) Numpy array that represents the board state
+- **win_status:** a (3,3) Numpy array that represents the global board
+- **last_move:** a tuple containing the position of the last move in the game
+
+## Implementation
+
+ Game, Logic and Players implementations ared based on TicTacToe implementation by Evgeny Tyurin. UltimateTicTacToeNNet is based on the Keras implementation of OthelloNNet by Shantanu Kumar.
+
+## Test Scripts
+
+To train a model for Ultimate Tic Tac Toe:
+````bash
+python ultimate_tic_tac_toe/utt_main.py
+````
+To test a model against a random player or a human player:
+````bash
+python ultimate_tic_tac_toe/utt_pit.py
+````
+
+## Experiments 
+
+We trained a Keras model for Ultimate TicTacToe (15 iterations, 100 episodes, 20 epochs per iteration and 25 MCTS simulations per turn) for about 50 hours on an AMD Radeon Pro 560 4GB with OpenCL and PlaidML.
diff --git a/ultimate_tictactoe/UltimateTicTacToeGame.py b/ultimate_tictactoe/UltimateTicTacToeGame.py
@@ -0,0 +1,105 @@
+import sys
+
+import numpy as np
+
+from Game import Game
+from ultimate_tictactoe.UltimateTicTacToeLogic import Board
+
+sys.path.append('..')
+
+
+class UltimateTicTacToeGame(Game):
+    def __init__(self, n=3):
+        super().__init__()
+        self.n = n
+        self.N = n ** 2
+
+    def getInitBoard(self):
+        return Board(self.n)
+
+    def getBoardSize(self):
+        return self.N, self.N
+
+    def getActionSize(self):
+        return (self.N ** 2) + 1
+
+    def getNextState(self, board, player, action):
+        if action == self.N ** 2:
+            return board, -player
+
+        b = Board(self.n)
+        b.copy(board)
+
+        move = (int(action / self.N), action % self.N)
+        b.execute_move(move, player)
+
+        return b, -player
+
+    def getValidMoves(self, board, player):
+        valid_move = [0] * self.getActionSize()
+
+        b = Board(self.n)
+        b.copy(board)
+
+        legal_moves = b.get_legal_moves()
+
+        if len(legal_moves) == 0:
+            valid_move[-1] = 1
+            return np.array(valid_move)
+
+        for x, y in legal_moves:
+            valid_move[self.N * x + y] = 1
+
+        return np.array(valid_move)
+
+    def getGameEnded(self, board, player):
+        b = Board(self.n)
+        b.copy(board)
+
+        if b.is_win(player):
+            return 1
+        if b.is_win(-player):
+            return -1
+        if b.has_legal_moves():
+            return 0
+
+        return 1e-4
+
+    def getCanonicalForm(self, board, player):
+        b = Board(self.n)
+        b.copy(board)
+        b.get_canonical_form(player)
+        return b
+
+    def getSymmetries(self, board, pi):
+        assert (len(pi) == self.N ** 2 + 1)
+        pi_board = np.reshape(pi[:-1], (self.N, self.N))
+        symmetry_list = []
+
+        for i in range(1, 5):
+            for j in [True, False]:
+                new_b = board.rot90(i, copy=True)
+                new_pi = np.rot90(pi_board, i)
+                if j:
+                    new_b = board.fliplr(copy=True)
+                    new_pi = np.fliplr(new_pi)
+                symmetry_list += [(new_b, list(new_pi.ravel()) + [pi[-1]])]
+        return symmetry_list
+
+    def stringRepresentation(self, board):
+        return board.tostring()
+
+    @staticmethod
+    def display(board):
+        value = {-1: "X", 1: 'O', 0: '.'}
+        board_pieces = board.pieces
+        for row in range(len(board_pieces)):
+            for item in range(len(board_pieces[0])):
+                if item != len(board_pieces[0]) - 1:
+                    print(value[board_pieces[row][item]], end=' ')
+                elif item == len(board_pieces[0]) - 1:
+                    print(value[board_pieces[row][item]])
+                if item in [2, 5]:
+                    print('|', end=' ')
+            if row in [2, 5]:
+                print('------+-------+------')
Original file line number	Diff line number	Diff line change
Expand Up		@@ -54,4 +54,4 @@ def main():


		if __name__ == "__main__":
		main()
		main()