diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..1bccc1fa8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.h5 filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index cd1152c6e..f3f194c8b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ checkpoints/ # For PyCharm users .idea/ +# For Visual Studio Code user +.vscode/ \ No newline at end of file diff --git a/Coach.py b/Coach.py index 9d228a07b..e8599697f 100644 --- a/Coach.py +++ b/Coach.py @@ -88,7 +88,7 @@ def learn(self): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() - # save the iteration examples to the history + # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: @@ -96,7 +96,7 @@ def learn(self): f"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}") self.trainExamplesHistory.pop(0) # backup history to a file - # NB! the examples were collected using the model from the previous iteration, so (i-1) + # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training diff --git a/main.py b/main.py index 5687c2a38..fa12ed2f6 100644 --- a/main.py +++ b/main.py @@ -54,4 +54,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/test_all_games.py b/test_all_games.py index 9d03dc43e..28bac22ba 100644 --- a/test_all_games.py +++ b/test_all_games.py @@ -43,6 +43,9 @@ from gobang.keras.NNet import NNetWrapper as GobangKerasNNet from gobang.tensorflow.NNet import NNetWrapper as GobangTensorflowNNet +from ultimate_tictactoe.UltimateTicTacToeGame import UltimateTicTacToeGame +from ultimate_tictactoe.keras.NNet import NNetWrapper as UltimateTicTacToeKerasNNet + import numpy as np from utils import * @@ -80,6 +83,9 @@ def test_gobang_keras(self): def test_gobang_tensorflow(self): self.execute_game_test(GobangGame(), GobangTensorflowNNet) + def test_ultimate_tic_tac_toe_keras(self): + self.execute_game_test(UltimateTicTacToeGame(), UltimateTicTacToeKerasNNet) + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/ultimate_tictactoe/README.md b/ultimate_tictactoe/README.md new file mode 100644 index 000000000..9441145af --- /dev/null +++ b/ultimate_tictactoe/README.md @@ -0,0 +1,41 @@ +# Ultimate TicTacToe implementation for Alpha Zero General +*Carlos Sosa, Eduardo Cuya, Ivonne Heredia, David Aguilar 2020* + +This is part of a undergraduate course final project in which different reinforcement learning algorithms are tested on Ultimate TicTacToe. +This part includes an implementation of that game in Alpha Zero General wrapper created by Surag Nair in [suragnair/alpha-zero-general](https://github.com/suragnair/alpha-zero-general). + +## Game Description + +An Ultimate TicTacToe board consists on a 9x9 grid which represents a TicTacToe board filled with TicTacToe 3x3 local boards. + + +After the first move, each move on a game is restricted to the relative board that correspond to the spot in which the last move was made. For example, if X plays in the bottom right space in their local board, O can only choose for their next move a square on the bottom right local board unless that local board was already won by someone or ended as a draw. In that case, O can choose any free local board. + +If a player wins a local board, it is considered as making a move on that position on the global board. The game ends when a player wins the global board or there are no moves left. + +## State Representation + +During a game, the board is represented by an object that has three attributes: + +- **pieces:** a (9,9) Numpy array that represents the board state +- **win_status:** a (3,3) Numpy array that represents the global board +- **last_move:** a tuple containing the position of the last move in the game + +## Implementation + + Game, Logic and Players implementations ared based on TicTacToe implementation by Evgeny Tyurin. UltimateTicTacToeNNet is based on the Keras implementation of OthelloNNet by Shantanu Kumar. + +## Test Scripts + +To train a model for Ultimate Tic Tac Toe: +````bash +python ultimate_tic_tac_toe/utt_main.py +```` +To test a model against a random player or a human player: +````bash +python ultimate_tic_tac_toe/utt_pit.py +```` + +## Experiments + +We trained a Keras model for Ultimate TicTacToe (15 iterations, 100 episodes, 20 epochs per iteration and 25 MCTS simulations per turn) for about 50 hours on an AMD Radeon Pro 560 4GB with OpenCL and PlaidML. diff --git a/ultimate_tictactoe/UltimateTicTacToeGame.py b/ultimate_tictactoe/UltimateTicTacToeGame.py new file mode 100644 index 000000000..9508ed155 --- /dev/null +++ b/ultimate_tictactoe/UltimateTicTacToeGame.py @@ -0,0 +1,105 @@ +import sys + +import numpy as np + +from Game import Game +from ultimate_tictactoe.UltimateTicTacToeLogic import Board + +sys.path.append('..') + + +class UltimateTicTacToeGame(Game): + def __init__(self, n=3): + super().__init__() + self.n = n + self.N = n ** 2 + + def getInitBoard(self): + return Board(self.n) + + def getBoardSize(self): + return self.N, self.N + + def getActionSize(self): + return (self.N ** 2) + 1 + + def getNextState(self, board, player, action): + if action == self.N ** 2: + return board, -player + + b = Board(self.n) + b.copy(board) + + move = (int(action / self.N), action % self.N) + b.execute_move(move, player) + + return b, -player + + def getValidMoves(self, board, player): + valid_move = [0] * self.getActionSize() + + b = Board(self.n) + b.copy(board) + + legal_moves = b.get_legal_moves() + + if len(legal_moves) == 0: + valid_move[-1] = 1 + return np.array(valid_move) + + for x, y in legal_moves: + valid_move[self.N * x + y] = 1 + + return np.array(valid_move) + + def getGameEnded(self, board, player): + b = Board(self.n) + b.copy(board) + + if b.is_win(player): + return 1 + if b.is_win(-player): + return -1 + if b.has_legal_moves(): + return 0 + + return 1e-4 + + def getCanonicalForm(self, board, player): + b = Board(self.n) + b.copy(board) + b.get_canonical_form(player) + return b + + def getSymmetries(self, board, pi): + assert (len(pi) == self.N ** 2 + 1) + pi_board = np.reshape(pi[:-1], (self.N, self.N)) + symmetry_list = [] + + for i in range(1, 5): + for j in [True, False]: + new_b = board.rot90(i, copy=True) + new_pi = np.rot90(pi_board, i) + if j: + new_b = board.fliplr(copy=True) + new_pi = np.fliplr(new_pi) + symmetry_list += [(new_b, list(new_pi.ravel()) + [pi[-1]])] + return symmetry_list + + def stringRepresentation(self, board): + return board.tostring() + + @staticmethod + def display(board): + value = {-1: "X", 1: 'O', 0: '.'} + board_pieces = board.pieces + for row in range(len(board_pieces)): + for item in range(len(board_pieces[0])): + if item != len(board_pieces[0]) - 1: + print(value[board_pieces[row][item]], end=' ') + elif item == len(board_pieces[0]) - 1: + print(value[board_pieces[row][item]]) + if item in [2, 5]: + print('|', end=' ') + if row in [2, 5]: + print('------+-------+------') diff --git a/ultimate_tictactoe/UltimateTicTacToeLogic.py b/ultimate_tictactoe/UltimateTicTacToeLogic.py new file mode 100644 index 000000000..d05987f68 --- /dev/null +++ b/ultimate_tictactoe/UltimateTicTacToeLogic.py @@ -0,0 +1,201 @@ +import numpy as np + + +class Board: + + def __init__(self, n=3): + self.n = n + self.N = n ** 2 + self.last_move = None + self.pieces = np.zeros((self.N, self.N)).astype(int) + self.win_status = np.zeros((n, n)).astype(int) + + def copy(self, other): + self.n = other.n + self.N = other.N + self.last_move = other.last_move + self.pieces = np.copy(other.pieces) + self.win_status = np.copy(other.win_status) + + def __getitem__(self, index): + return self.pieces[index] + + def get_legal_moves(self): + + moves = set() + legal_coord = self.get_legal_area() + + if legal_coord and not (self.is_locked(legal_coord[0], legal_coord[1]) or + self.is_full(legal_coord[0], legal_coord[1])): + for x in range(legal_coord[0] * self.n, (legal_coord[0] + 1) * self.n): + for y in range(legal_coord[1] * self.n, (legal_coord[1] + 1) * self.n): + if self[x][y] == 0: + legal_move = (x, y) + moves.add(legal_move) + else: + for x in range(self.N): + for y in range(self.N): + area_coord = self.get_area(x, y) + if legal_coord: + if area_coord != legal_coord and not self.is_locked(area_coord[0], area_coord[1]): + if self[x][y] == 0: + legal_move = (x, y) + moves.add(legal_move) + else: + if self[x][y] == 0: + legal_move = (x, y) + moves.add(legal_move) + + return list(moves) + + def get_area(self, x, y): + area_x = x // self.n + area_y = y // self.n + + return area_x, area_y + + def get_legal_area(self): + if not self.last_move: + return None + return self.last_move[0] % self.n, self.last_move[1] % self.n + + def is_locked(self, x, y): + return self.win_status[x][y] != 0 + + def has_legal_moves(self): + return len(self.get_legal_moves()) != 0 + + def is_win(self, player): + win = self.n + + # check y-strips + for y in range(self.n): + count = 0 + for x in range(self.n): + if self.win_status[x][y] == player: + count += 1 + if count == win: + return True + + # check x-strips + for x in range(self.n): + count = 0 + for y in range(self.n): + if self.win_status[x][y] == player: + count += 1 + if count == win: + return True + + # check two diagonal strips + count = 0 + for d in range(self.n): + if self.win_status[d][d] == player: + count += 1 + if count == win: + return True + + count = 0 + for d in range(self.n): + if self.win_status[d][self.n - d - 1] == player: + count += 1 + if count == win: + return True + + return False + + def is_local_win(self, area, player): + win = self.n + + # check y-strips + for y in range(area[1] * self.n, (area[1] + 1) * self.n): + count = 0 + for x in range(area[0] * self.n, (area[0] + 1) * self.n): + if self[x][y] == player: + count += 1 + if count == win: + return True + + # check x-strips + for x in range(area[0] * self.n, (area[0] + 1) * self.n): + count = 0 + for y in range(area[1] * self.n, (area[1] + 1) * self.n): + if self[x][y] == player: + count += 1 + if count == win: + return True + + # check two diagonal strips + count = 0 + for x, y in \ + zip(range(area[0] * self.n, (area[0] + 1) * self.n), range(area[1] * self.n, (area[1] + 1) * self.n)): + if self[x][y] == player: + count += 1 + if count == win: + return True + + count = 0 + for x, y in \ + zip(range(area[0] * self.n, (area[0] + 1) * self.n), range(area[1] * self.n, (area[1] + 1) * self.n)): + if self[x][area[1] * self.n + (area[1] + 1) * self.n - y - 1] == player: + count += 1 + if count == win: + return True + + return False + + def execute_move(self, move, player): + + (x, y) = move + + assert self[x][y] == 0 + self[x][y] = player + self.last_move = move + + area_x, area_y = self.get_area(x, y) + if self.is_local_win((area_x, area_y), player): + self.win_status[area_x][area_y] = player + + def get_canonical_form(self, player): + self.pieces = player * self.pieces + self.win_status = player * self.win_status + + def rot90(self, i, copy=False): + if copy: + board = Board(self.n) + board.copy(self) + + board.pieces = np.rot90(board.pieces, i) + board.win_status = np.rot90(board.win_status, i) + + return board + else: + self.pieces = np.rot90(self.pieces, i) + self.win_status = np.rot90(self.win_status, i) + + return True + + def fliplr(self, copy=False): + if copy: + board = Board(self.n) + board.copy(self) + + board.pieces = np.fliplr(board.pieces) + board.win_status = np.fliplr(board.win_status) + + return board + else: + self.pieces = np.fliplr(self.pieces) + self.win_status = np.fliplr(self.win_status) + + return True + + def tostring(self): + return np.array(self.pieces).tostring() + + def is_full(self, x0, y0): + for y in range(y0 * self.n, (y0 + 1) * self.n): + for x in range(x0 * self.n, (x0 + 1) * self.n): + if not self[x][y]: + return False + + return True diff --git a/ultimate_tictactoe/UltimateTicTacToePlayers.py b/ultimate_tictactoe/UltimateTicTacToePlayers.py new file mode 100644 index 000000000..274b32d64 --- /dev/null +++ b/ultimate_tictactoe/UltimateTicTacToePlayers.py @@ -0,0 +1,39 @@ +import numpy as np + + +class RandomUltimateTictacToePlayer(): + def __init__(self, game): + self.game = game + + def play(self, board): + a = np.random.randint(self.game.getActionSize()) + valids = self.game.getValidMoves(board, 1) + while valids[a] != 1: + a = np.random.randint(self.game.getActionSize()) + return a + + +class HumanUltimateTicTacToePlayer(): + def __init__(self, game): + self.game = game + + def play(self, board): + # display(board) + valid = self.game.getValidMoves(board, 1) + for i in range(len(valid)): + if valid[i]: + print(int(i / self.game.N), int(i % self.game.N)) + while True: + # Python 3.x + a = input() + # Python 2.x + # a = raw_input() + + x, y = [int(x) for x in a.split(' ')] + a = self.game.N * x + y if x != -1 else self.game.N ** 2 + if valid[a]: + break + else: + print('Invalid') + + return a diff --git a/ultimate_tictactoe/__init__.py b/ultimate_tictactoe/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ultimate_tictactoe/keras/NNet.py b/ultimate_tictactoe/keras/NNet.py new file mode 100644 index 000000000..9cc7faf78 --- /dev/null +++ b/ultimate_tictactoe/keras/NNet.py @@ -0,0 +1,72 @@ +import os +import sys + +import numpy as np + +sys.path.append('..') +from utils import * +from NeuralNet import NeuralNet + +from .UltimateTicTacToeNNet import UltimateTicTacToeNNet as utttnet + +""" +NeuralNet wrapper class for the TicTacToeNNet. + +Author: Evgeny Tyurin, github.com/evg-tyurin +Date: Jan 5, 2018. + +Based on (copy-pasted from) the NNet by SourKream and Surag Nair. +""" + +args = dotdict({ + 'lr': 0.001, + 'dropout': 0.5, + 'epochs': 20, + 'batch_size': 64, + 'cuda': False, + 'num_channels': 512, +}) + + +class NNetWrapper(NeuralNet): + def __init__(self, game): + self.nnet = utttnet(game, args) + self.board_x, self.board_y = game.getBoardSize() + self.action_size = game.getActionSize() + + def train(self, examples): + """ + examples: list of examples, each example is of form (board, pi, v) + """ + input_boards, target_pis, target_vs = list(zip(*examples)) + input_boards = np.asarray([board.pieces for board in input_boards]) + target_pis = np.asarray(target_pis) + target_vs = np.asarray(target_vs) + self.nnet.model.fit(x=input_boards, y=[target_pis, target_vs], batch_size=args.batch_size, epochs=args.epochs) + + def predict(self, board): + """ + board: Board class object + """ + # preparing input + board_pieces = np.array(board.pieces).reshape((1, board.N, board.N)) + + # run + pi, v = self.nnet.model.predict(board_pieces) + + return pi[0], v[0] + + def save_checkpoint(self, folder='checkpoint', filename='checkpoint.h5'): + filepath = os.path.join(folder, filename) + if not os.path.exists(folder): + print("Checkpoint Directory does not exist! Making directory {}".format(folder)) + os.mkdir(folder) + else: + print("Checkpoint Directory exists! ") + self.nnet.model.save_weights(filepath) + + def load_checkpoint(self, folder='checkpoint', filename='checkpoint.h5'): + filepath = os.path.join(folder, filename) + if not os.path.exists(filepath): + raise ("No model in path '{}'".format(filepath)) + self.nnet.model.load_weights(filepath) diff --git a/ultimate_tictactoe/keras/UltimateTicTacToeNNet.py b/ultimate_tictactoe/keras/UltimateTicTacToeNNet.py new file mode 100644 index 000000000..e3782845c --- /dev/null +++ b/ultimate_tictactoe/keras/UltimateTicTacToeNNet.py @@ -0,0 +1,47 @@ +import sys + +sys.path.append('..') + +from keras.models import * +from keras.layers import * +from keras.optimizers import * + +""" +NeuralNet for the game of Ultimate TicTacToe. + +Author: Eduardo Cuya, github.com/Cubi123 +Date: Jan 26, 2020. + +Based on the OthelloNNet by SourKream and Surag Nair. +""" + + +class UltimateTicTacToeNNet(): + def __init__(self, game, args): + # game params + self.board_x, self.board_y = game.getBoardSize() + self.action_size = game.getActionSize() + self.args = args + + # Neural Net + self.input_boards = Input(shape=(self.board_x, self.board_y)) # s: batch_size (N,N) + + x_image = Reshape((self.board_x, self.board_y, 1))(self.input_boards) # batch_size x board_x x board_y x 1 + h_conv1 = Activation('relu')(BatchNormalization(axis=3)( + Conv2D(args.num_channels, 3, padding='same')(x_image))) # batch_size x board_x x board_y x num_channels + h_conv2 = Activation('relu')(BatchNormalization(axis=3)( + Conv2D(args.num_channels, 3, padding='same')(h_conv1))) # batch_size x board_x x board_y x num_channels + h_conv3 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(args.num_channels, 3, padding='same')( + h_conv2))) # batch_size x (board_x) x (board_y) x num_channels + h_conv4 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(args.num_channels, 3, padding='valid')( + h_conv3))) # batch_size x (board_x-2) x (board_y-2) x num_channels + h_conv4_flat = Flatten()(h_conv4) + s_fc1 = Dropout(args.dropout)( + Activation('relu')(BatchNormalization(axis=1)(Dense(1024)(h_conv4_flat)))) # batch_size x 1024 + s_fc2 = Dropout(args.dropout)( + Activation('relu')(BatchNormalization(axis=1)(Dense(512)(s_fc1)))) # batch_size x 1024 + self.pi = Dense(self.action_size, activation='softmax', name='pi')(s_fc2) # batch_size x self.action_size + self.v = Dense(1, activation='tanh', name='v')(s_fc2) # batch_size x 1 + + self.model = Model(inputs=self.input_boards, outputs=[self.pi, self.v]) + self.model.compile(loss=['categorical_crossentropy', 'mean_squared_error'], optimizer=Adam(args.lr)) diff --git a/ultimate_tictactoe/keras/__init__.py b/ultimate_tictactoe/keras/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ultimate_tictactoe/uttt_main.py b/ultimate_tictactoe/uttt_main.py new file mode 100644 index 000000000..0557cfa4e --- /dev/null +++ b/ultimate_tictactoe/uttt_main.py @@ -0,0 +1,52 @@ +import os + +os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" + +from Coach import Coach +from ultimate_tictactoe.UltimateTicTacToeGame import UltimateTicTacToeGame +from ultimate_tictactoe.keras.NNet import NNetWrapper as nn +from utils import * + +print('Loading %s...', UltimateTicTacToeGame.__name__) + +g = UltimateTicTacToeGame() + +print('Loading %s...', nn.__name__) +nnet = nn(g) +# + +# log.info('Loading the Coach...') + +args = dotdict({ + 'numIters': 15, + 'numEps': 100, # Number of complete self-play games to simulate during a new iteration. + 'tempThreshold': 15, + 'updateThreshold': 0.6, # During arena playoff, new neural net will be accepted if threshold or more of games + # are won. + 'maxlenOfQueue': 2000, # Number of game examples to train the neural networks. + 'numMCTSSims': 25, # Number of games moves for MCTS to simulate. + 'arenaCompare': 10, # Number of games to play during arena play to determine if new net will be accepted. + 'cpuct': 1, + + 'checkpoint': './temp/', + 'load_model': False, + 'load_folder_file': ('./pretrained_models/ultimate_tictactoe/keras/', + 'ultimate_tictactoe_100_eps_10_epoch_checkpoint_3.h5'), + 'numItersForTrainExamplesHistory': 20, + +}) + +if args.load_model: + print('Loading checkpoint "%s/%s"...', args.load_folder_file) + nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) +else: + print('Not loading a checkpoint!') + +c = Coach(g, nnet, args) + +if args.load_model: + print("Loading 'trainExamples' from file...") + c.loadTrainExamples() + +print('Starting the learning process 🎉') +c.learn() diff --git a/ultimate_tictactoe/uttt_pit.py b/ultimate_tictactoe/uttt_pit.py new file mode 100644 index 000000000..7efd62ac7 --- /dev/null +++ b/ultimate_tictactoe/uttt_pit.py @@ -0,0 +1,54 @@ +import os + +os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" + +import numpy as np +import Arena +from MCTS import MCTS +from othello.OthelloPlayers import HumanOthelloPlayer +from ultimate_tictactoe.UltimateTicTacToeGame import UltimateTicTacToeGame +from ultimate_tictactoe.UltimateTicTacToePlayers import RandomUltimateTictacToePlayer +from ultimate_tictactoe.keras.NNet import NNetWrapper as NNet +from utils import * +""" +use this script to play any two agents against each other, or play manually with +any agent. +""" + +random_vs_cpu = False # Play in 6x6 instead of the normal 8x8. +human_vs_cpu = False +rl_vs_az = True +num_games = 20 + + +g = UltimateTicTacToeGame() + +# all players +rp = RandomUltimateTictacToePlayer(g).play +hp = HumanOthelloPlayer(g).play + +# nnet players +n1 = NNet(g) +n1.load_checkpoint('./pretrained_models/ultimate_tictactoe/keras/', + 'ultimate_tictactoe_100_eps_20_epoch_checkpoint_9.h5') +args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) +mcts1 = MCTS(g, n1, args1) +n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) + +if human_vs_cpu: + player2 = hp +elif random_vs_cpu: + player2 = rp +else: + n2 = NNet(g) + n2.load_checkpoint('./pretrained_models/ultimate_tictactoe/keras/', + 'ultimate_tictactoe_100_eps_10_epoch_checkpoint_3.h5') + args2 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) + mcts2 = MCTS(g, n2, args2) + n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) + + player2 = n2p # Player 2 is neural network if it's cpu vs cpu. + +arena = Arena.Arena(player1=n1p, player2=player2, game=g, display=UltimateTicTacToeGame.display) + +print(arena.playGames(num_games,verbose=False))