Python中使用tic TAC Toe的Minimax算法

如何解决Python中使用tic TAC Toe的Minimax算法

我正在尝试制作井字游戏AI，它通过使用minimax算法来最佳地玩游戏。我注意到它并没有做出最佳动作，并且将它与自身的结果放在一起总是会为'X'玩家赢（它应该导致平局）。这是我的算法代码：

def getBestMove(state,player):
    '''
    Minimax Algorithm
    '''
    winner_loser,done = check_current_state(state)
    if done == "Done" and winner_loser == 'O': # If AI won
        return 1
    elif done == "Done" and winner_loser == 'X': # If Human won
        return -1
    elif done == "Draw":    # Draw condition
        return 0
        
    moves = []
    empty_cells = []
    for i in range(3):
        for j in range(3):
            if state[i][j] is ' ':
                empty_cells.append(i*3 + (j+1))
    
    for empty_cell in empty_cells:
        move = {}
        move['index'] = empty_cell
        new_state = copy_game_state(state)
        play_move(new_state,player,empty_cell)
        
        if player == 'O':    # If AI
            result = getBestMove(new_state,'X')    # make more depth tree for human
            move['score'] = result
        else:
            result = getBestMove(new_state,'O')    # make more depth tree for AI
            move['score'] = result
        
        moves.append(move)

    # Find best move
    best_move = None
    if player == 'O':   # If AI player
        best = -infinity
        for move in moves:
            if move['score'] > best:
                best = move['score']
                best_move = move['index']
    else:
        best = infinity
        for move in moves:
            if move['score'] < best:
                best = move['score']
                best_move = move['index']
                
    return best_move

在这里我该怎么办？

解决方法

我认为，如果遵循标准的minimax算法，例如here，会更容易。我也建议添加alpha-beta修剪以使其速度更快，即使在Tic Tac Toe中并不是必需的。这是我很久以前就可以用来启发灵感的一个游戏示例，它基本上全部取自链接的维基百科页面，并进行了一些小调整，例如move,evaluation = minimax(board,8,-math.inf,math.inf,True) def minimax(board,depth,alpha,beta,maximizing_player): if depth == 0 or board.is_winner() or board.is_board_full(): return None,evaluate(board) children = board.get_possible_moves(board) best_move = children[0] if maximizing_player: max_eval = -math.inf for child in children: board_copy = copy.deepcopy(board) board_copy.board[child[0]][child[1]].player = 'O' current_eval = minimax(board_copy,depth - 1,False)[1] if current_eval > max_eval: max_eval = current_eval best_move = child alpha = max(alpha,current_eval) if beta <= alpha: break return best_move,max_eval else: min_eval = math.inf for child in children: board_copy = copy.deepcopy(board) board_copy.board[child[0]][child[1]].player = 'X' current_eval = minimax(board_copy,True)[1] if current_eval < min_eval: min_eval = current_eval best_move = child beta = min(beta,min_eval def evaluate(board): if board.is_winner('X'): return -1 if board.is_winner('O'): return 1 return 0进行alpha-beta修剪：

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>

#define MAXLINE 1024

struct my_msgbuf {
        long mtype;
        char mtext[MAXLINE];
};
int main(void)
{
    struct my_msgbuf buf;
    int msqid;
    key_t key;

    if ((key = ftok("client.c",'B')) == -1) {
        perror("ftok");
        exit(1);
    }

    if ((msqid = msgget(key,0644 | IPC_CREAT)) == -1) {
        perror("msgget");
        exit(1);
    }

    printf("Write a text:\n");

    buf.mtype = 1; 
    while( fgets(buf.mtext,MAXLINE,stdin) != NULL ) {
        if (msgsnd(msqid,(struct msgbuf *)&buf,sizeof(buf),0) == -1)
            perror("msgsnd");
    }

    if (msgctl(msqid,IPC_RMID,NULL) == -1) {
        perror("msgctl");
        exit(1);
    }

    return 0;
}

请注意，对电路板进行深拷贝（或在递归minimax调用后取消make函数）非常重要，否则您将更改原始电路板的状态，并且会得到一些奇怪的行为。