-
Notifications
You must be signed in to change notification settings - Fork 0
/
connect4.py
512 lines (487 loc) · 28 KB
/
connect4.py
1
import numpy as npimport randomrand = random.Random()class Connect_Four: # Constants to building the game board and the PSO iterations/particles NUM_ROW = 6 NUM_COLUMN = 7 NUM_WINNER = 4 NUM_PARTICLE = 200 NUM_ITERATION = 200 def __init__(self): # Base game board, filled with 0s (which the game interprets as empty spaces) self.board = np.zeros((self.NUM_ROW, self.NUM_COLUMN), dtype=int) # Fitness board is the gameboard with preference scores attached to them. By default, the numbers seen # here relate to how many ways the user can win with that piece there (for instance, if # it played at the 7 at the bottom, there are seven ways to win with that). This means it is likely # that the agent will play there if it goes first, to maximize chances of winning. self.fitness = np.array([[3, 4, 5, 7, 5, 4, 3], [4, 6, 8, 10, 8, 6, 4], [5, 8, 11, 13, 11, 8, 5], [5, 8, 11, 13, 11, 8, 5], [4, 6, 8, 10, 8, 6, 4], [3, 4, 5, 7, 5, 4, 3]], dtype=int) # Copy the fitness table so that it can be used in other areas without altering the above after we want to. self.fitness_copy = self.fitness.copy() self.particle = [] # This function allows us and users to see the Connect-Four gaming board. def __str__(self): string_board = "\n\n" + str(self.board).replace("0", "_").replace("-1", " O").replace("1", "X") string_board = string_board.replace("[", " ").replace("]", " ") return string_board # Function to determine what are our legal moves; sees high use for a full base game. def moves_available(self): return [m for m in range(self.NUM_COLUMN) if self.board[0][m] == 0] # Another function for when it will make the move on a live game board. For when we make the full game. def make_move(self, move): if np.sum(self.board) == 0: player = 1 else: player = -1 j = 0 while j + 1 < self.NUM_ROW and self.board[j + 1][move] == 0: j += 1 self.board[j][move] = player # For when we play a live game, the class needs to find out if the latest move determined the winner of the game. def decide_winner(self): for i in range(self.NUM_ROW - self.NUM_WINNER + 1): for j in range(self.NUM_COLUMN - self.NUM_WINNER + 1): subboard = self.board[i:i + self.NUM_WINNER, j:j + self.NUM_WINNER] if np.max(np.abs(np.sum(subboard, 0))) == self.NUM_WINNER: return True if np.max(np.abs(np.sum(subboard, 1))) == self.NUM_WINNER: return True elif np.abs(sum([subboard[k, k] for k in range(self.NUM_WINNER)])) == self.NUM_WINNER: return True elif np.abs( sum([subboard[k, self.NUM_WINNER - 1 - k] for k in range(self.NUM_WINNER)])) == self.NUM_WINNER: return True return False # Given a main() game board, it fits it to the class' own game board. def generate_random_board(self, board): self.board = board def fitness_eval(self, board): board = self.board for row in range(0, self.NUM_ROW): for col in range(0, self.NUM_COLUMN): # This if statement is the "How to not lose" function. It first considers the ways any of the enemy # player's pieces could win. If there are a three-string of enemy pieces in any direction, then it # will allocate a supreme weight to the final fourth position, so that it will play at a place where # it will ensure it won't lose that way. if board[row][col] == 1: if row == 0 or row == 1 or row == 2: if col == 0 or col == 1 or col == 2: # if statements for when row++, col++ (i.e. piece goes down and right on the board) if board[row + 1][col] == 1: if board[row + 2][col] == 1: self.fitness[row + 3][col] = 5000 if board[row][col + 1] == 1: if board[row][col + 2] == 1: self.fitness[row][col + 3] = 5000 if board[row + 1][col + 1] == 1: if board[row + 2][col + 2] == 1: self.fitness[row + 3][col + 3] = 5000 if col == 3: # if statements for when row++. col++/-- if board[row + 1][col] == 1: if board[row + 2][col] == 1: self.fitness[row + 3][col] = 5000 if board[row][col + 1] == 1: if board[row][col + 2] == 1: self.fitness[row][col + 3] = 5000 if board[row + 1][col + 1] == 1: if board[row + 2][col + 2] == 1: self.fitness[row + 3][col + 3] = 5000 if board[row + 1][col - 1] == 1: if board[row + 2][col - 2] == 1: self.fitness[row + 3][col - 3] = 5000 if board[row][col - 1] == 1: if board[row][col - 2] == 1: self.fitness[row][col - 3] = 5000 if col == 4 or col == 5 or col == 6: # if statements for when row++, col-- if board[row + 1][col] == 1: if board[row + 2][col] == 1: self.fitness[row + 3][col] = 5000 if board[row + 1][col - 1] == 1: if board[row + 2][col - 2] == 1: self.fitness[row + 3][col - 3] = 5000 if board[row][col - 1] == 1: if board[row][col - 2] == 1: self.fitness[row][col - 3] = 5000 if row == 3 or row == 4 or row == 5: if col == 0 or col == 1 or col == 2: # if statements for when row--, col++ if board[row - 1][col + 1] == 1: if board[row - 2][col + 2] == 1: self.fitness[row - 3][col + 3] = 5000 if board[row][col + 1] == 1: if board[row][col + 2] == 1: self.fitness[row][col + 3] = 5000 if board[row - 1][col] == 1: if board[row - 2][col] == 1: self.fitness[row - 3][col] = 5000 if col == 3: # if statements for when row--. col++/-- if board[row - 1][col - 1] == 1: if board[row - 2][col - 2] == 1: self.fitness[row - 3][col - 3] = 5000 if board[row - 1][col + 1] == 1: if board[row - 2][col + 2] == 1: self.fitness[row - 3][col + 3] = 5000 if board[row - 1][col] == 1: if board[row - 2][col] == 1: self.fitness[row - 3][col] = 5000 if board[row][col - 1] == 1: if board[row][col - 2] == 1: self.fitness[row][col - 3] = 5000 if board[row][col + 1] == 1: if board[row][col + 2] == 1: self.fitness[row][col + 3] = 5000 if col == 4 or col == 5 or col == 6: # if statements for when row--, col-- if board[row - 1][col - 1] == 1: if board[row - 2][col - 2] == 1: self.fitness[row - 3][col - 3] = 5000 if board[row - 1][col] == 1: if board[row - 2][col] == 1: self.fitness[row - 3][col] = 5000 if board[row][col - 1] == 1: if board[row][col - 2] == 1: self.fitness[row][col - 3] = 5000 # Now that it ensured it won't lose (or lose one way), it will now consider how to win. It detects # its own pieces and if it has any two-strings. If it does, then it will allocate a intermediate # weight to putting a third consecutive piece. If that 3rd spot is already filled by its own piece, # then it will put an even bigger weight behind the 4th. if board[row][col] == -1: if row == 0 or row == 1 or row == 2: if col == 0 or col == 1 or col == 2: # if statements for when row++, col++ if board[row + 1][col] == -1: if self.fitness[row + 2][col] < 1000: self.fitness[row + 2][col] = 1000 if board[row + 2][col] == -1 and self.fitness[row + 3][col] != 5000: self.fitness[row + 3][col] = 6000 if board[row][col + 1] == -1: if self.fitness[row][col + 2] < 1000: self.fitness[row][col + 2] = 1000 if board[row][col + 2] == -1 and self.fitness[row][col + 3] != 5000: self.fitness[row][col + 3] = 6000 if board[row + 1][col + 1] == -1: if self.fitness[row + 2][col + 2] < 1000: self.fitness[row + 2][col + 2] = 1000 if board[row + 2][col + 2] == -1 and self.fitness[row + 3][col + 3] != 5000: self.fitness[row + 3][col + 3] = 6000 if col == 3: # if statements for when row++. col++/-- if board[row + 1][col] == -1: if self.fitness[row + 2][col] < 1000: self.fitness[row + 2][col] = 1000 if board[row + 2][col] == -1 and self.fitness[row + 3][col] != 5000: self.fitness[row + 3][col] = 6000 if board[row][col + 1] == -1: if self.fitness[row][col + 2] < 1000: self.fitness[row][col + 2] = 1000 if board[row][col + 2] == -1 and self.fitness[row][col + 3] != 5000: self.fitness[row][col + 3] = 6000 if board[row + 1][col + 1] == -1: if self.fitness[row + 2][col + 2] < 1000: self.fitness[row + 2][col + 2] = 1000 if board[row + 2][col + 2] == -1 and self.fitness[row + 3][col + 3] != 5000: self.fitness[row + 3][col + 3] = 6000 if board[row + 1][col - 1] == -1: if self.fitness[row + 2][col - 2] < 1000: self.fitness[row + 2][col - 2] = 1000 if board[row + 2][col - 2] == -1 and self.fitness[row + 3][col - 3] != 5000: self.fitness[row + 3][col - 3] = 6000 if board[row][col - 1] == -1: if self.fitness[row][col - 2] < 1000: self.fitness[row][col - 2] = 1000 if board[row][col - 2] == -1 and self.fitness[row][col - 3] != 5000: self.fitness[row][col - 3] = 6000 if col == 4 or col == 5 or col == 6: # if statements for when row++, col-- if board[row + 1][col] == -1: if self.fitness[row + 2][col] < 1000: self.fitness[row + 2][col] = 1000 if board[row + 2][col] == -1 and self.fitness[row + 3][col] != 5000: self.fitness[row + 3][col] = 6000 if board[row + 1][col - 1] == -1: if self.fitness[row + 2][col - 2] < 1000: self.fitness[row + 2][col - 2] = 1000 if board[row + 2][col - 2] == -1 and self.fitness[row + 3][col - 3] != 5000: self.fitness[row + 3][col - 3] = 6000 if board[row][col - 1] == -1: if self.fitness[row][col - 2] < 1000: self.fitness[row][col - 2] = 1000 if board[row][col - 2] == -1 and self.fitness[row][col - 3] != 5000: self.fitness[row][col - 3] = 6000 if row == 3 or row == 4 or row == 5: if col == 0 or col == 1 or col == 2: # if statements for when row--, col++ if board[row - 1][col + 1] == -1: if self.fitness[row - 2][col + 2] < 1000: self.fitness[row - 2][col + 2] = 1000 if board[row - 2][col + 2] == -1 and self.fitness[row - 3][col + 3] != 5000: self.fitness[row - 3][col + 3] = 6000 if board[row][col + 1] == -1: if self.fitness[row][col + 2] < 1000: self.fitness[row][col + 2] = 1000 if board[row][col + 2] == -1 and self.fitness[row][col + 3] != 5000: self.fitness[row][col + 3] = 6000 if board[row - 1][col] == -1: if self.fitness[row - 2][col] < 1000: self.fitness[row - 2][col] = 1000 if board[row - 2][col] == -1 and self.fitness[row - 3][col] != 5000: self.fitness[row - 3][col] = 6000 if col == 3: # if statements for when row--. col++/-- if board[row - 1][col - 1] == -1: if self.fitness[row - 2][col - 2] < 1000: self.fitness[row - 2][col - 2] = 1000 if board[row - 2][col - 2] == -1 and self.fitness[row - 3][col - 3] != 5000: self.fitness[row - 3][col - 3] = 6000 if board[row - 1][col + 1] == -1: if self.fitness[row - 2][col + 2] < 1000: self.fitness[row - 2][col + 2] = 1000 if board[row - 2][col + 2] == -1 and self.fitness[row - 3][col + 3] != 5000: self.fitness[row - 3][col + 3] = 6000 if board[row - 1][col] == -1: if self.fitness[row - 2][col] < 1000: self.fitness[row - 2][col] = 1000 if board[row - 2][col] == -1 and self.fitness[row - 3][col] != 5000: self.fitness[row - 3][col] = 6000 if board[row][col - 1] == -1: if self.fitness[row][col - 2] < 1000: self.fitness[row][col - 2] = 1000 if board[row][col - 2] == -1 and self.fitness[row][col - 3] != 5000: self.fitness[row][col - 3] = 6000 if board[row][col + 1] == -1: if self.fitness[row][col + 2] < 1000: self.fitness[row][col + 2] = 1000 if board[row][col + 2] == -1 and self.fitness[row][col + 3] != 5000: self.fitness[row][col + 3] = 6000 if col == 4 or col == 5 or col == 6: # if statements for when row--, col-- if board[row - 1][col - 1] == -1: if self.fitness[row - 2][col - 2] < 1000: self.fitness[row - 2][col - 2] = 1000 if board[row - 2][col - 2] == -1 and self.fitness[row - 3][col - 3] != 5000: self.fitness[row - 3][col - 3] = 6000 if board[row - 1][col] == -1: if self.fitness[row - 2][col] < 1000: self.fitness[row - 2][col] = 1000 if board[row - 2][col] == -1 and self.fitness[row - 3][col] != 5000: self.fitness[row - 3][col] = 6000 if board[row][col - 1] == -1: if self.fitness[row][col - 2] < 1000: self.fitness[row][col - 2] = 1000 if board[row][col - 2] == -1 and self.fitness[row][col - 3] != 5000: self.fitness[row][col - 3] = 6000 # Final evaluation that returns the base evaluation scores if the pieces it allocated high weights to # are not reachable, whether due to already being occupied or there are empty spaces below them. for row in range(0, self.NUM_ROW): for col in range(0, self.NUM_COLUMN): if board[row][col] == 0: if row != 5: if board[row + 1][col] == 0: self.fitness[row][col] = self.fitness_copy[row][col] if row == 5: if board[row][col] == 0: self.fitness[row][col] = self.fitness_copy[row][col] elif board[row][col] == 1 or board[row][col] == -1: self.fitness[row][col] = self.fitness_copy[row][col] for col in range(0, self.NUM_COLUMN): if board[0][col] != 0: for row in range(0, self.NUM_ROW): self.fitness[row][col] = 0 temp_best_val = -1 temp_best_col = 0 for row in range(0, self.NUM_ROW): for col in range(0, self.NUM_COLUMN): if self.fitness[row][col] > temp_best_val: temp_best_val = self.fitness[row][col] temp_best_col = col # This print statement is the evaluation's answer, which is the one PSO should be looking for # NOTE: PSO may not always find this answer depending on chance, number of iterations, and number of # particles in the swarm. print("From evaluation: best move is ", temp_best_col) string_fitness = "\n\n" + str(self.fitness) string_fitness = string_fitness.replace("[", " ").replace("]", " ") return string_fitness def PSO(self): # The PSO algorithm we implemented. We start with the constants involved with updating particle velocity. c1 = 2 c2 = 2 r1 = rand.random() r2 = rand.random() # Initialize the particles to appear randomly on the game board. for i in range(0, self.NUM_PARTICLE): coord = [rand.choice(range(0, self.NUM_ROW)), rand.choice(range(0, self.NUM_COLUMN))] self.particle.append([coord, int(self.fitness[coord[0]][coord[1]]), [0, 0], coord]) for i in range(0, self.NUM_ITERATION): for p in range(0, len(self.particle)): if self.fitness[self.particle[p][0][0]][self.particle[p][0][1]] > self.particle[p][1]: self.particle[p][1] = int(self.fitness[self.particle[p][0][0]][self.particle[p][0][1]]) self.particle[p][3] = [self.particle[p][0][0], self.particle[p][0][1]] max_fitness = 0 gbest = [] # Finding pBest. for p in range(0, self.NUM_PARTICLE): if self.particle[p][1] > max_fitness: max_fitness = self.particle[p][1] gbest = self.particle[p] for p in range(0, self.NUM_PARTICLE): # This function updates the velocity. for x in range(0, 2): self.particle[p][2][x] = int(self.particle[p][2][x] \ + c1 * r1 * (self.particle[p][3][x] - self.particle[p][0][x]) \ + c2 * r2 * (gbest[0][x] - self.particle[p][0][x])) self.particle[p][0][x] = int(self.particle[p][0][x] + self.particle[p][2][x]) # These if statements are for when the updated velocities result in the particles moving off the # game board; if and when it does, these if statements will nudge them back to the boundaries. if self.particle[p][0][0] > 5: self.particle[p][0][0] = 5 if self.particle[p][0][1] > 6: self.particle[p][0][1] = 6 if self.particle[p][0][0] < 0: self.particle[p][0][0] = 0 if self.particle[p][0][1] < 0: self.particle[p][0][1] = 0 # print gbest print("\nPSO best move: ", gbest[3][1], "\n")def main(): # The main statement makes 10 prearranged game boards, handing it off to the Connect_Four class and has it # find the best move through the evaluation function and through PSO w/ the evaluations of fitness. # The objective is that both of their answers are the same. print("O is AI player, X is enemy of AI") my_game = Connect_Four() board1 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 1, 0, -1, 0, 0, 0], [0, 1, -1, -1, -1, 0, 0], [1, 1, 1, -1, -1, -1, 0]], dtype=int) my_game.generate_random_board(board1) print(my_game) print(my_game.fitness_eval(board1)) my_game.PSO() # We must call for a new construction of Connect_Four, hence these same lines. the print statement is also # repeated so that users can determine what is the next game board better. my_game = Connect_Four() print("O is AI player, X is enemy of AI") board2 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 1, -1, 0, 0, 0], [0, 0, 1, -1, 0, 0, 0], [0, 0, 1, -1, 0, 0, 0]], dtype=int) my_game.generate_random_board(board2) print(my_game) print(my_game.fitness_eval(board2)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board3 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 1, -1, -1, 1, 0], [0, 0, 1, -1, 1, -1, 0], [0, 0, -1, 1, 1, -1, 0]], dtype=int) my_game.generate_random_board(board3) print(my_game) print(my_game.fitness_eval(board3)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board4 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]], dtype=int) my_game.generate_random_board(board4) print(my_game) print(my_game.fitness_eval(board4)) my_game.PSO() my_game = Connect_Four() board5 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1], [-1, 0, 0, 0, 0, 0, 1], [-1, 0, 0, 0, 0, 0, 1]], dtype=int) my_game.generate_random_board(board5) print(my_game) print(my_game.fitness_eval(board5)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board6 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1], [-1, -1, -1, 0, 0, 0, 1], [-1, -1, 1, 1, 0, 1, 1]], dtype=int) my_game.generate_random_board(board6) print(my_game) print(my_game.fitness_eval(board6)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board7 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, -1, 0, 0, 0, 0, -1], [1, 1, -1, 0, 0, -1, 1], [-1, -1, 1, -1, 1, 1, 1], [-1, -1, 1, -1, 1, -1, 1]], dtype=int) my_game.generate_random_board(board7) print(my_game) print(my_game.fitness_eval(board7)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board8 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, -1, -1, 1, 0, 0, -1], [1, 1, -1, 1, 0, -1, 1], [-1, -1, 1, -1, 1, 1, 1], [-1, -1, 1, -1, 1, -1, 1]], dtype=int) my_game.generate_random_board(board8) print(my_game) print(my_game.fitness_eval(board8)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board9 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0], [-1, -1, -1, 1, 0, 0, -1], [1, 1, -1, 1, 0, -1, 1], [-1, -1, 1, -1, 1, 1, 1], [-1, -1, 1, -1, 1, -1, 1]], dtype=int) my_game.generate_random_board(board9) print(my_game) print(my_game.fitness_eval(board9)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board10 = np.array([[0., 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [1, -1, -1, 1, 0, 1, -1], [1, 1, -1, 1, -1, -1, 1], [-1, -1, 1, -1, 1, 1, 1], [-1, -1, 1, -1, 1, -1, 1]], dtype=int) my_game.generate_random_board(board10) print(my_game) print(my_game.fitness_eval(board10)) my_game.PSO() my_game = Connect_Four() print("O is AI player, X is enemy of AI") board11 = np.array([[0., 0, -1, 1, 0, 0, 0], [0, 0, -1, 1, 0, 0, 0], [0, 0, 1, -1, 0, 0, 0], [0, 0, -1, 1, 0, 0, 0], [0, 0, -1, 1, 0, 0, 0], [0, 0, -1, 1, 0, 0, 0]], dtype=int) my_game.generate_random_board(board11) print(my_game) print(my_game.fitness_eval(board11)) my_game.PSO()if __name__ == "__main__": main()