This repository has been archived by the owner on May 2, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
play_tictactoe_az.py
66 lines (52 loc) · 2.27 KB
/
play_tictactoe_az.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import logging
import os
import random
import sys
import torch
import yaml
from alphazero.agents.alphazero import AlphaZeroArgMaxAgent
from alphazero.alphazero.mcts import MonteCarloTreeSearch
from alphazero.alphazero.nn_modules.nets import dual_resnet
from alphazero.alphazero.state_encoders.ttt_state_encoder import TicTacToeStateEncoder
from alphazero.games.tictactoe import TicTacToeGame, TicTacToePlayer, TicTacToeMove
FORMAT = '%(asctime)s - %(name)-15s - %(levelname)s - %(message)s'
logging.basicConfig(stream=sys.stderr, level=logging.INFO,
format=FORMAT, datefmt='%m/%d/%Y %I:%M:%S %p')
with open('tictactoe.yaml', 'r') as f:
config = yaml.safe_load(f)
config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'
def read_move(player: TicTacToePlayer) -> TicTacToeMove:
x, y = input(f"{player.name} move: ").split()
x, y = int(x), int(y)
return TicTacToeMove(x, y)
if __name__ == '__main__':
game = TicTacToeGame(config['game_size'])
state_encoder = TicTacToeStateEncoder(config['device'])
net = dual_resnet(game, config)
mcts = MonteCarloTreeSearch(game=game,
state_encoder=state_encoder,
nn=net,
config=config)
net.load_state_dict(torch.load(os.path.join('pretrained', 'ttt_dualres_comp.pth')))
# net.load_state_dict(torch.load(os.path.join(config['log_dir'], 'best.pth')))
net.eval()
agent = AlphaZeroArgMaxAgent(game, state_encoder, net, config)
agent_role = random.choice([TicTacToePlayer.X, TicTacToePlayer.O])
while not game.is_over:
game.show_board()
# print(f"current state score by eval func: {agent.eval_fn(game.state, agent.player)}")
if game.current_player == agent_role:
move = read_move(game.current_player)
while not game.state.is_legal_move(move):
print("Illegal move, try again")
move = read_move(game.current_player)
else:
move = agent.select_move(game.state)
print(f"Agent {game.current_player!s} move: {move}")
game.play(move)
print("--- GAME OVER ---")
game.show_board()
if game.winner:
print(f"{game.winner.name} wins!")
else:
print("tie :(")