Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remaining MTD algorithms & Introducing Monte Carlo Tree Search #13

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions easyAI/AI/DUAL.py
Original file line number Diff line number Diff line change
@@ -61,8 +61,8 @@ def __call__(self,game):
scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack

first = -self.win_score #essence of DUAL algorithm
next = (lambda lowerbound, upperbound, bestValue: bestValue + 1)
first = (lambda game, tt: -self.win_score) #essence of DUAL algorithm
next = (lambda lowerbound, upperbound, bestValue, bound: bestValue + 1)

self.alpha = mtd(game,
first, next,
24 changes: 16 additions & 8 deletions easyAI/AI/DictTT.py
Original file line number Diff line number Diff line change
@@ -13,21 +13,22 @@ def __init__(self, num_buckets=1024, own_hash = None):
self.dict = []
for i in range(num_buckets):
self.dict.append((None, None))
self.keys = dict()
#self.keys = dict()
self.hash = hash
if own_hash != None:
own_hash.modulo = len(self.dict)
self.hash = own_hash.get_hash
self.num_collisions = 0
self.num_calls = 0
self.num_lookups = 0

def hash_key(self, key):
"""
Given a key this will create a number and then convert it to
an index for the dict.
"""
self.num_calls += 1
return self.hash(key) % len(self.dict)
return self.hash(key) & len(self.dict)-1

def get_slot(self, key, default=None):
"""
@@ -44,7 +45,8 @@ def get_slot(self, key, default=None):
def get(self, key, default=None):
"""
Gets the value for the given key, or the default.
"""
"""
self.num_lookups += 1
i, k, v = self.get_slot(key, default=default)
return v

@@ -59,10 +61,10 @@ def set(self, key, value):

self.dict[slot] = (key, value)

if self.keys.__contains__(key):
self.keys[key] = self.keys[key] + 1
else:
self.keys[key] = 1
#if self.keys.__contains__(key):
# self.keys[key] = self.keys[key] + 1
#else:
# self.keys[key] = 1

def delete(self, key):
"""
@@ -97,4 +99,10 @@ def __iter__(self):

def __contains__(self, key):
return self.keys.__contains__(key)


def print_stats(self):
print ('-'*10)
print ('Statistics of custom dictionary:')
print ('Calls of hash: ', self.num_calls)
print ('Collisions: ', self.num_collisions)
print ('Num lookups: ', self.num_lookups)
5 changes: 3 additions & 2 deletions easyAI/AI/HashTT.py
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@ def before(self, key):
Returns initial value of hash.
It's also the place where you can initialize some auxiliary variables
"""
return 0
return 1

def after(self, key, hash):
"""
@@ -25,7 +25,8 @@ def get_hash(self, key, depth = 0):
"""
Recursively computes a hash
"""
ret_hash = self.before(key)
if depth == 0:
ret_hash = self.before(key)
if type(key) is int:
return self.hash_int(key)
if type(key) is str and len(key) <= 1:
2 changes: 1 addition & 1 deletion easyAI/AI/Hashes.py
Original file line number Diff line number Diff line change
@@ -66,7 +66,7 @@ def before(self, key):
return 0
def join(self, one, two):
one = (one << 4) + two;
self.g = one & 0xf0000000L;
self.g = one & 0xf0000000;

if self.g != 0:
one ^= self.g >> 24
130 changes: 130 additions & 0 deletions easyAI/AI/MCTS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#contributed by mrfesol (Tomasz Wesolowski)

import random
from math import sqrt, log

class MCTS:
"""
This implements Monte Carlo Tree Search algorithm.
More information at: http://mcts.ai/index.html
The following example shows
how to setup the AI and play a Connect Four game:

>>> from easyAI import Human_Player, AI_Player, MTDf
>>> AI = MonteCarloTreeSearch()
>>> game = ConnectFour([AI_Player(AI),Human_Player()])
>>> game.play()

Parameters
-----------

iterations:
Indicates how many iteration algorithm should perform.
Larger value = More accurate result

max_depth:
How many moves in advance should the AI think ?
(2 moves = 1 complete turn)

expand_factor:
Defines how much is algorithm willing to expand unvisited nodes.
Usually between 0.3 and 1.0

scoring:
A function f(game)-> score. If no scoring is provided
and the game object has a ``scoring`` method it ill be used.
Scoring function MUST return values from interval [0, win_score]

win_score:
The largest score of game.
It's required to run algorithm.

"""

def __init__(self, iterations = 5000, winscore=100, depth = 20, expand_factor=0.3, scoring=None):
self.scoring = scoring
self.iterations = iterations
self.winscore = winscore
self.max_depth = depth
self.expand_factor = expand_factor

def __call__(self,game):
"""
Returns the AI's best move given the current state of the game.
"""
rootnode = MCTSNode(state = game)

scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack

for i in range(self.iterations):
node = rootnode
state = game.copy()
depth = 0

# Select
while node.untried == [] and node.children != []:
node = node.select_child(self.expand_factor)
state.make_move(node.move)
state.switch_player()
depth += 1

# Expand
if node.untried != []:
m = random.choice(node.untried)
state.make_move(m)
state.switch_player()
node = node.add_child(m,state)

# Rollout
while state.possible_moves() != [] and depth < self.max_depth:
state.make_move(random.choice(state.possible_moves()))
state.switch_player()
depth += 1

# Backpropagate
score = 1 - max(0, (scoring(state)/self.winscore))
while node != None:
node.update(score)
node = node.parent
score = 1-score

rootnode.children.sort(key = lambda c: c.visits)
return rootnode.children[-1].move

class MCTSNode:
def __init__(self, move = None, parent = None, state = None):
self.move = move
self.parent = parent
self.children = []
self.wins = 0.0
self.visits = 0.0
self.untried = state.possible_moves()
self.last_player = state.nopponent

def formula(self):
return self.wins/self.visits

def formula_exp(self):
return 0.3*sqrt(2*log(self.parent.visits)/self.visits)

def select_child(self, expand_factor):
""" Using the UCB1 formula to select_child a child node.
"""
return sorted(self.children, key = lambda c: c.wins/c.visits + \
expand_factor*sqrt(2*log(self.visits)/c.visits))[-1]

def add_child(self, m, s):
n = MCTSNode(move = m, parent = self, state = s)
self.untried.remove(m)
self.children.append(n)
return n

def update(self, result):
self.visits += 1
self.wins += result

def __repr__(self):
return "[P: " + str(self.last_player) + " M:" + str(self.move) + \
" W/V:" + str(self.wins) + "/" + str(self.visits) + " F: " + \
str(self.formula()) + " F_exp: " + str(self.formula_exp()) + "]"
73 changes: 73 additions & 0 deletions easyAI/AI/MTDbi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#contributed by mrfesol (Tomasz Wesolowski)

from easyAI.AI.MTdriver import mtd

class MTDbi:
"""
This implements MTD-bi algorithm. The following example shows
how to setup the AI and play a Connect Four game:

>>> from easyAI import Human_Player, AI_Player, MTDbi
>>> AI = MTDbi(7)
>>> game = ConnectFour([AI_Player(AI),Human_Player()])
>>> game.play()

Parameters
-----------

depth:
How many moves in advance should the AI think ?
(2 moves = 1 complete turn)

scoring:
A function f(game)-> score. If no scoring is provided
and the game object has a ``scoring`` method it ill be used.

win_score:
Score LARGER than the largest score of game, but smaller than inf.
It's required to run algorithm.

tt:
A transposition table (a table storing game states and moves)
scoring: can be none if the game that the AI will be given has a
``scoring`` method.

Notes
-----

The score of a given game is given by

>>> scoring(current_game) - 0.01*sign*current_depth

for instance if a lose is -100 points, then losing after 4 moves
will score -99.96 points but losing after 8 moves will be -99.92
points. Thus, the AI will chose the move that leads to defeat in
8 turns, which makes it more difficult for the (human) opponent.
This will not always work if a ``win_score`` argument is provided.

"""

def __init__(self, depth, scoring=None, win_score=100000, tt=None):
self.scoring = scoring
self.depth = depth
self.tt = tt
self.win_score= win_score

def __call__(self,game):
"""
Returns the AI's best move given the current state of the game.
"""

scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack

first = (lambda game, tt: 0) #essence of MTDbi algorithm
next = (lambda lowerbound, upperbound, bestValue, bound: (lowerbound + upperbound)/2)

self.alpha = mtd(game,
first, next,
self.depth,
scoring,
self.tt)

return game.ai_move
83 changes: 83 additions & 0 deletions easyAI/AI/MTDf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#contributed by mrfesol (Tomasz Wesolowski)

from easyAI.AI.MTdriver import mtd

class MTDf:
"""
This implements MTD-f algorithm. The following example shows
how to setup the AI and play a Connect Four game:

>>> from easyAI import Human_Player, AI_Player, MTDf
>>> AI = MTDf(7)
>>> game = ConnectFour([AI_Player(AI),Human_Player()])
>>> game.play()

Parameters
-----------

depth:
How many moves in advance should the AI think ?
(2 moves = 1 complete turn)

scoring:
A function f(game)-> score. If no scoring is provided
and the game object has a ``scoring`` method it ill be used.

win_score:
Score LARGER than the largest score of game, but smaller than inf.
It's required to run algorithm.

tt:
A transposition table (a table storing game states and moves)
scoring: can be none if the game that the AI will be given has a
``scoring`` method.

Notes
-----

The score of a given game is given by

>>> scoring(current_game) - 0.01*sign*current_depth

for instance if a lose is -100 points, then losing after 4 moves
will score -99.96 points but losing after 8 moves will be -99.92
points. Thus, the AI will chose the move that leads to defeat in
8 turns, which makes it more difficult for the (human) opponent.
This will not always work if a ``win_score`` argument is provided.

"""

def __init__(self, depth, scoring=None, win_score=100000, tt=None):
self.scoring = scoring
self.depth = depth
self.tt = tt
self.win_score= win_score

@staticmethod
def first(game, tt):
lookup = None if (tt is None) else tt.lookup(game)
if lookup == None:
return 0
lowerbound, upperbound = lookup['lowerbound'], lookup['upperbound']
return (lowerbound+upperbound)/2

def __call__(self,game):
"""
Returns the AI's best move given the current state of the game.
"""

scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack


first = MTDf.first #essence of MTDf algorithm
next = (lambda lowerbound, upperbound, bestValue, bound: bestValue
if bestValue < bound else bestValue + 1)

self.alpha = mtd(game,
first, next,
self.depth,
scoring,
self.tt)

return game.ai_move
78 changes: 78 additions & 0 deletions easyAI/AI/MTDstep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#contributed by mrfesol (Tomasz Wesolowski)

from easyAI.AI.MTdriver import mtd

class MTDstep:
"""
This implements MTD-step algorithm. The following example shows
how to setup the AI and play a Connect Four game:
>>> from easyAI import Human_Player, AI_Player, MTDstep
>>> AI = MTDstep(7)
>>> game = ConnectFour([AI_Player(AI),Human_Player()])
>>> game.play()
Parameters
-----------
depth:
How many moves in advance should the AI think ?
(2 moves = 1 complete turn)
scoring:
A function f(game)-> score. If no scoring is provided
and the game object has a ``scoring`` method it ill be used.
win_score:
Score LARGER than the largest score of game, but smaller than inf.
It's required to run algorithm.
tt:
A transposition table (a table storing game states and moves)
scoring: can be none if the game that the AI will be given has a
``scoring`` method.
step_size:
Size of jump from one bound to next
Notes
-----
The score of a given game is given by
>>> scoring(current_game) - 0.01*sign*current_depth
for instance if a lose is -100 points, then losing after 4 moves
will score -99.96 points but losing after 8 moves will be -99.92
points. Thus, the AI will chose the move that leads to defeat in
8 turns, which makes it more difficult for the (human) opponent.
This will not always work if a ``win_score`` argument is provided.
"""

def __init__(self, depth, scoring=None, win_score=100000, tt=None, step_size = 100):
self.scoring = scoring
self.depth = depth
self.tt = tt
self.win_score = win_score
self.step_size = step_size

def __call__(self,game):
"""
Returns the AI's best move given the current state of the game.
"""

scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack


first = (lambda game, tt: self.win_score)
next = (lambda lowerbound, upperbound, bestValue, bound: max(lowerbound + 1, bestValue - self.step_size))

self.alpha = mtd(game,
first, next,
self.depth,
scoring,
self.tt)

return game.ai_move
4 changes: 2 additions & 2 deletions easyAI/AI/MTdriver.py
Original file line number Diff line number Diff line change
@@ -92,10 +92,10 @@ def mtd(game, first, next, depth, scoring, tt = None):
For more details read following paper:
http://arxiv.org/ftp/arxiv/papers/1404/1404.1515.pdf
"""
bound, best_value = first, first
bound, best_value = first(game, tt), first(game, tt)
lowerbound, upperbound = -inf, inf
while True:
bound = next(lowerbound, upperbound, best_value)
bound = next(lowerbound, upperbound, best_value, bound)
best_value = mt(game, bound - eps, depth, depth, scoring, tt)
if best_value < bound:
upperbound = best_value
21 changes: 12 additions & 9 deletions easyAI/AI/Negamax.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
"""

import pickle
from easyAI.games.ThreeMusketeers import MOVES

LOWERBOUND, EXACT, UPPERBOUND = -1,0,1
inf = float('infinity')
@@ -18,6 +19,10 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf,
http://en.wikipedia.org/wiki/Negamax
"""


#if tt != None:
#tt.d.num_calcs += 1

alphaOrig = alpha

# Is there a transposition table and is this game in it ?
@@ -27,6 +32,7 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf,
# The game has been visited in the past

if lookup['depth'] >= depth:
#tt.d.num_lookups += 1
flag, value = lookup['flag'], lookup['value']
if flag == EXACT:
if depth == origDepth:
@@ -60,17 +66,14 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf,
possible_moves = [lookup['move']] + possible_moves

else:

possible_moves = game.possible_moves()



state = game
best_move = possible_moves[0]
if depth == origDepth:
state.ai_move = possible_moves[0]

bestValue = -inf
best_value = -inf
unmake_move = hasattr(state, 'unmake_move')


@@ -89,7 +92,7 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf,
game.switch_player()
game.unmake_move(move)

bestValue = max( bestValue, move_alpha )
best_value = max( best_value, move_alpha )
if alpha < move_alpha :
alpha = move_alpha
best_move = move
@@ -101,12 +104,12 @@ def negamax(game, depth, origDepth, scoring, alpha=+inf, beta=-inf,
if tt != None:

assert best_move in possible_moves
tt.store(game=state, depth=depth, value = bestValue,
tt.store(game=state, depth=depth, value = best_value,
move= best_move,
flag = UPPERBOUND if (bestValue <= alphaOrig) else (
LOWERBOUND if (bestValue >= beta) else EXACT))
flag = UPPERBOUND if (best_value <= alphaOrig) else (
LOWERBOUND if (best_value >= beta) else EXACT))

return bestValue
return best_value


class Negamax:
4 changes: 2 additions & 2 deletions easyAI/AI/SSS.py
Original file line number Diff line number Diff line change
@@ -61,8 +61,8 @@ def __call__(self,game):
scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack

first = self.win_score #essence of SSS algorithm
next = (lambda lowerbound, upperbound, bestValue: bestValue)
first = (lambda game, tt: self.win_score) #essence of SSS algorithm
next = (lambda lowerbound, upperbound, bestValue, bound: bestValue)

self.alpha = mtd(game,
first, next,
7 changes: 6 additions & 1 deletion easyAI/AI/__init__.py
Original file line number Diff line number Diff line change
@@ -4,4 +4,9 @@
from .MTdriver import mtd
from .SSS import SSS
from .DUAL import DUAL
from .HashTT import HashTT
from .MTDbi import MTDbi
from .MTDf import MTDf
from .MTDstep import MTDstep
from .HashTT import HashTT
from .DictTT import DictTT
from .MCTS import MCTS
2 changes: 1 addition & 1 deletion easyAI/AI/solving.py
Original file line number Diff line number Diff line change
@@ -78,7 +78,7 @@ def id_solve(game, ai_depths, win_score, scoring=None,
result = (+1 if alpha>= win_score else (
-1 if alpha <= -win_score else 0))

return result, depth, game.ai_move
return result, depth, game.ai_move, tt


def df_solve(game, win_score, maxdepth=50, tt=None, depth=0):
2 changes: 1 addition & 1 deletion easyAI/__init__.py
Original file line number Diff line number Diff line change
@@ -6,5 +6,5 @@
from .AI import Negamax, id_solve, df_solve
from .AI import TT
from .AI import mtd
from .AI import SSS, DUAL
from .AI import SSS, DUAL, MTDbi, MTDf, MTDstep, MCTS
from .AI import HashTT, DictTT
27 changes: 14 additions & 13 deletions easyAI/games/Chopsticks.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
from copy import deepcopy
from easyAI.AI.DictTT import DictTT
from easyAI.AI.Hashes import JSWHashTT
from easyAI.AI import MTDbi

class Chopsticks( TwoPlayersGame ):
"""
@@ -82,17 +83,17 @@ def show(self):
print("Player %d: " %(i+1)),
for j in range(self.numhands):
if self.hands[i][j] > 0:
print('|'*self.hands[i][j] + '\t'),
print('|'*self.hands[i][j] + '\t',)
else:
print('x\t'),
print('x\t',)
print('')

def scoring(self):
"""
Very simple heuristic counting 'alive' hands
"""
if self.lose():
return -100
return 0
if self.win():
return 100
alive = [0] * 2
@@ -120,15 +121,15 @@ def back_to_startstate(self, move):
return hands_min == 1 and hands_max == 1

if __name__ == "__main__":
from easyAI import Negamax, AI_Player, SSS, DUAL
from easyAI import Negamax, AI_Player, SSS, DUAL, MTDbi, MTDf, MTDstep
from easyAI.AI.TT import TT
ai_algo_neg = Negamax(4)
ai_algo_sss = SSS(4)
dict_tt = DictTT(32, JSWHashTT())
ai_algo_dual = DUAL(4, tt=TT(dict_tt))
Chopsticks( [AI_Player(ai_algo_neg),AI_Player(ai_algo_dual)]).play() #first player never wins

print '-'*10
print 'Statistics of custom dictionary:'
print 'Calls of hash: ', dict_tt.num_calls
print 'Collisions: ', dict_tt.num_collisions
dict_tt = DictTT(32)
ai_algo_sss = SSS(6, tt=TT(dict_tt)) # SSS algorithm
ai_algo_neg = Negamax(6, tt=TT(dict_tt)) # Negamax algorithm
ai_algo_bi = MTDbi(6, tt=TT(dict_tt)) # MTDbi algorithm
ai_algo_f = MTDf(5, tt=TT(dict_tt)) # MTDf algorithm
ai_algo_step = MTDstep(5, tt=TT(dict_tt)) # MTDstep algorithm
ai_algo_dual = DUAL(4, tt=TT(dict_tt)) # DUAL algorithm
Chopsticks( [AI_Player(ai_algo_neg),AI_Player(ai_algo_step)]).play()
dict_tt.print_stats()
5 changes: 3 additions & 2 deletions easyAI/games/ConnectFour.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from easyAI.AI.DictTT import DictTT
from easyAI.AI.MTDbi import MTDbi
try:
import numpy as np
except ImportError:
@@ -71,10 +72,10 @@ def find_four(board, nplayer):
if __name__ == '__main__':
# LET'S PLAY !

from easyAI import Human_Player, AI_Player, Negamax, SSS, DUAL
from easyAI import Human_Player, AI_Player, Negamax, SSS, DUAL, MTDbi

ai_algo_neg = Negamax(5)
ai_algo_sss = SSS(5)
ai_algo_sss = MTDbi(5)
game = ConnectFour([AI_Player(ai_algo_neg), AI_Player(ai_algo_sss)])
game.play()
if game.lose():
23 changes: 15 additions & 8 deletions easyAI/games/Nim.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from easyAI import TwoPlayersGame
from easyAI.AI import MCTS
from easyAI.AI.MCTS import MCTS


class Nim(TwoPlayersGame):
@@ -48,21 +50,26 @@ def ttentry(self): return tuple(self.piles) #optional, speeds up AI
if __name__ == "__main__":
# IN WHAT FOLLOWS WE SOLVE THE GAME AND START A MATCH AGAINST THE AI

from easyAI import AI_Player, Human_Player, Negamax, id_solve
from easyAI import AI_Player, Human_Player, Negamax, id_solve, SSS, DictTT
from easyAI.AI import TT
# we first solve the game
w, d, m, tt = id_solve(Nim, range(5, 20), win_score = 80)
print
w, d, len(tt.d)
#w, d, m, tt = id_solve(Nim, range(5, 10), win_score = 80)
#print (w, d, len(tt.d))
# the previous line prints -1, 16 which shows that if the
# computer plays second with an AI depth of 16 (or 15) it will
# always win in 16 (total) moves or less.

# Now let's play (and lose !) against the AI
ai = Negamax(16, tt = TT())
game = Nim([Human_Player(), AI_Player(tt)])
game.play() # You will always lose this game !
print("player %d wins" % game.nplayer)
ai_negamax = Negamax(7)
ai_mcts = MCTS(20000) # 20000 iterations
ai_mcts_weak = MCTS() # 10000 iterations (default)
game = Nim([AI_Player(ai_mcts), AI_Player(ai_negamax)])
game.play()
print("player %d wins" % game.nplayer) #MCTS often wins

game = Nim([AI_Player(ai_mcts_weak), AI_Player(ai_negamax)])
game.play()
print("player %d wins" % game.nplayer) #MCTS often loses

# Note that with the transposition table tt generated by id_solve
# we can setup a perfect AI which doesn't have to think: