
AlphaBeta Pruning TicTacToe 不阻塞,是 eval 问题吗?

我已经调试了好几天了,我不知道我在 tic-tac-toe 游戏和 AI 的这段代码中做错了什么(我知道它不是真正的 AI 但...)我选择了因为这是 Alpha-Beta 修剪。它的 7x7 板子对于纯粹的极小极大实现来说太重了。

我的问题是我不明白为什么 Alpha-Beta 不会阻止玩家拖延游戏并等待玩家移动并使用对他有利的正确移动,或者只是简单地将游戏平局。

我决定棋盘中央的分数(最终得分)比棋盘边缘的分数多。我相信更多地向中心移动将比边缘移动更多的成功机会,这就是为什么我创建了 AddscoreToMove 函数来评估该移动。 为了确保 eval 函数会检查棋盘上的每一个可能的移动,我没有让该函数像 find first xxx 那样工作(例如在 row0 和 col0、col1、col2 处)并返回(因为可能有 4X 或 4O)。此外,4X 或 4O 的得分明显高于其他得分,应视为胜利。

谁能告诉我我做错了什么?这是我使用 AI 的第二个程序,第一个是在 3x3 板上使用 minimax 运行良好。

C# 代码如下


Public Class Form1
    Dim board As Char(,) = {
            {" "," "," "},{" "," "}}
    Class Move
        Public row,col As Integer
    End Class
    Dim BestMoveRow As Integer = 0
    Dim BestMoveCol As Integer = 0
    Dim BestMovescore As Integer = 0

    Shared player As Char = "X",opponent As Char = "O"

    Shared Function AddscoreToMove(thatMove As Move) As Integer
        Dim row As Integer = thatMove.row
        Dim col As Integer = thatMove.col
        '0 score,move is at border 
        If ((row >= 1 And row <= 5) And col = 0) Then
            Return 0
        ElseIf ((row >= 1 And row <= 5) And col = 6) Then
            Return 0
        ElseIf (row = 0 And (col >= 0 And col <= 6)) Then
            Return 0
        ElseIf (row = 6 And (col >= 0 And col <= 6)) Then
            Return 0
        End If

        '1 score,thatMove is at border +1
        If ((row >= 2 And row <= 4) And col = 1) Then
            Return 1
        ElseIf ((row >= 2 And row <= 4) And col = 5) Then
            Return 1
        ElseIf (row = 1 And (col >= 1 And col <= 5)) Then
            Return 1
        ElseIf (row = 5 And (col >= 1 And col <= 5)) Then
            Return 1
        End If

        '2 score,thatMove is at border +2
        If (row = 2 And col = 2) Then
            Return 2
        ElseIf (row = 2 And col = 4) Then
            Return 2
        ElseIf (row = 2 And (col >= 2 And col <= 4)) Then
            Return 2
        ElseIf (row = 4 And (col >= 2 And col <= 4)) Then
            Return 2
        End If

        '3 Center thatMove 
        If (row = 3 And col <= 3) Then
            Return 3
        End If

        Return 0 'error not added lane

    End Function

    Private Shared Function eval(ByVal b As Char(,)) As Integer

        Dim playerscorerow As Integer = 0
        Dim playerscorecol As Integer = 0
        Dim playerscorecross As Integer = 0
        Dim pcscorerow As Integer = 0
        Dim pcscorecol As Integer = 0
        Dim pcscorecross As Integer = 0
        ''EVALUATE rows 
        For row As Integer = 0 To 3
            For col As Integer = 0 To 6
            'initialize moves to evaluate
                Dim move3 As New Move With {
                                        .row = row + 3,.col = col
                Dim move2 As New Move With {
                                        .row = row + 2,.col = col
                Dim move1 As New Move With {
                                        .row = row + 1,.col = col
                Dim move0 As New Move With {
                                        .row = row,.col = col
                If Not b(row,col) = " " Then 'ITS NOT EMPTY - PLAYER OR PC MOVED HERE
                    Dim movescore As Integer = AddscoreToMove(move0) 'EVALUATE THAT MOVE 
                    If b(row,col) = b(row + 1,col) Then 'THERE IS 2 X or 2 O
                        Dim move1score As Integer = AddscoreToMove(move1)
                        If b(row + 1,col) = b(row + 2,col) Then 'THERE IS 3x or 3O
                            Dim move2score As Integer = AddscoreToMove(move2)
                            If b(row + 2,col) = b(row + 3,col) Then 'THERE IS 4X or 4O
                                Dim move3score As Integer = AddscoreToMove(move3)
                                If b(row,col) = player Then 'PLAYER HAVE 4X HERE
                                    playerscorerow = Math.Max(playerscorerow,100 + move3score + move2score + move1score + movescore) 'GET HIGHEST OF ALL EVALUATIONS OF THAT FOR LOOPS
                                ElseIf b(row,col) = opponent Then 'PC HAVE 4O HERE
                                    pcscorerow = Math.Min(pcscorerow,-100 - move3score - move2score - move1score - movescore)
                                End If
                            End If
                            If b(row,col) = player Then
                                playerscorerow = Math.Max(playerscorerow,5 + move2score + move1score + movescore)
                            ElseIf b(row,col) = opponent Then
                                pcscorerow = Math.Min(pcscorerow,-5 - move2score - move1score - movescore)
                            End If
                        End If
                        If b(row,col) = player Then
                            playerscorerow = Math.Max(playerscorerow,2 + move1score + movescore)
                        ElseIf b(row,col) = opponent Then
                            pcscorerow = Math.Min(pcscorerow,-2 - move1score - movescore)
                        End If
                    End If
                    If b(row,col) = player Then
                        playerscorerow = Math.Max(playerscorerow,movescore)
                    ElseIf b(row,col) = opponent Then
                        pcscorerow = Math.Min(pcscorerow,-movescore)
                    End If
                End If

        ''col win
        For row As Integer = 0 To 6
            For col As Integer = 0 To 3
                Dim move3 As New Move With {
                                        .row = row + 3,.col = col
                If Not b(row,col) = " " Then
                    Dim movescore As Integer = AddscoreToMove(move0)
                    If b(row,col) = b(row,col + 1) Then
                        Dim movescore1 As Integer = AddscoreToMove(move1)
                        If b(row,col + 1) = b(row,col + 2) Then
                            Dim movescore2 As Integer = AddscoreToMove(move2)
                            If b(row,col + 2) = b(row,col + 3) Then
                                Dim movescore3 As Integer = AddscoreToMove(move3)
                                If b(row,col) = player Then
                                    playerscorerow = Math.Max(playerscorerow,100 + movescore3 + movescore2 + movescore1 + movescore)
                                ElseIf b(row,col) = opponent Then
                                    pcscorerow = Math.Min(pcscorerow,-100 - movescore3 - movescore2 - movescore1 - movescore)
                                End If
                            End If
                            If b(row,5 + movescore2 + movescore1 + movescore)
                            ElseIf b(row,-5 - movescore2 - movescore1 - movescore)
                            End If
                        End If
                        If b(row,2 + movescore1 + movescore)
                        ElseIf b(row,-2 - movescore1 - movescore)
                        End If
                    End If
                    If b(row,-movescore)
                    End If
                End If

        'cross win
        For row As Integer = 0 To 3
            For col As Integer = 0 To 3
                If Not b(row,col) = " " Then
                    If (b(row,col + 1) AndAlso b(row + 1,col + 1) = b(row + 2,col + 2) AndAlso b(row + 2,col + 2) = b(row + 3,col + 3)) Then
                        If b(row,col) = player Then
                            Return +10
                        ElseIf b(row,col) = opponent Then
                            Return -10
                        End If
                    End If
                End If

        'cross win
        For row As Integer = 0 To 3
            For col As Integer = 3 To 6
                If Not b(row,col - 1) AndAlso b(row + 1,col - 1) = b(row + 2,col - 2) AndAlso b(row + 2,col - 2) = b(row + 3,col - 3)) Then
                        If b(row,col) = opponent Then
                            Return -10
                        End If
                    End If
                End If

        Dim scoreValues() As Integer = {playerscorerow,playerscorecol,playerscorecross,pcscorerow,pcscorecol,pcscorecross}
        Dim max = scoreValues.OrderByDescending(Function(z) Math.Abs(z)).FirstOrDefault()
        Return max
    End Function

    Private Shared Function MiniMax(ByVal board As Char(,),ByVal machineMove As Boolean,ByVal depth As Integer) As Integer
        Const alpha As Integer = -10_000
        Const beta As Integer = 10_000
        Return AlphaBetaPruning(board,machineMove,depth,beta,alpha)

    End Function

    Private Shared Function AlphaBetaPruning(ByVal board As Char(,ByVal depth As Integer,ByVal beta As Integer,ByVal alpha As Integer) As Integer
        If depth = 0 Then Return eval(board)
        If machineMove Then 'min PC MOVE
            For i As Integer = 0 To 6
                For j As Integer = 0 To 6
                    If board(i,j) = " " Then
                        board(i,j) = opponent
                        Dim score As Integer = Math.Min(AlphaBetaPruning(board,Not machineMove,depth - 1,alpha),eval(board))
                        board(i,j) = " "
                        If score < beta Then
                            Form1.BestMoveRow = i
                            Form1.BestMoveCol = j
                            Form1.BestMovescore = score
                            beta = score
                        End If
                        If alpha >= beta Then Exit For 'cutoff
                    End If
            Return beta
        Else 'max PLAYER MOVE
            For i As Integer = 0 To 6
                For j As Integer = 0 To 6
                    If board(i,j) = player
                        Dim score As Integer = Math.Max(AlphaBetaPruning(board,j) = " "
                        If score > alpha Then
                            alpha = score
                        End If
                        If alpha >= beta Then Exit For
                    End If
            Return alpha
        End If
    End Function
End Class

C# 代码

public class Form1
    private char[,] board = new[] { { " "," " },{ " "," " } };
    class Move
        public int row,col;

    private int BestMoveRow = 0;
    private int BestMoveCol = 0;
    private int BestMovescore = 0;

    private static char player = "X";
    private static char opponent = "O";

    public static int AddscoreToMove(Move thatMove)
        int row = thatMove.row;
        int col = thatMove.col;
        // 0 score,move is at border 
        if (((row >= 1 & row <= 5) & col == 0))
            return 0;
        else if (((row >= 1 & row <= 5) & col == 6))
            return 0;
        else if ((row == 0 & (col >= 0 & col <= 6)))
            return 0;
        else if ((row == 6 & (col >= 0 & col <= 6)))
            return 0;

        // 1 score,thatMove is at border +1
        if (((row >= 2 & row <= 4) & col == 1))
            return 1;
        else if (((row >= 2 & row <= 4) & col == 5))
            return 1;
        else if ((row == 1 & (col >= 1 & col <= 5)))
            return 1;
        else if ((row == 5 & (col >= 1 & col <= 5)))
            return 1;

        // 2 score,thatMove is at border +2
        if ((row == 2 & col == 2))
            return 2;
        else if ((row == 2 & col == 4))
            return 2;
        else if ((row == 2 & (col >= 2 & col <= 4)))
            return 2;
        else if ((row == 4 & (col >= 2 & col <= 4)))
            return 2;

        // 3 Center thatMove 
        if ((row == 3 & col <= 3))
            return 3;

        return 0; // error not added lane

    private static int eval(char[,] b)
        int playerscorerow = 0;
        int playerscorecol = 0;
        int playerscorecross = 0;
        int pcscorerow = 0;
        int pcscorecol = 0;
        int pcscorecross = 0;

        // 'EVALUATE rows 
        for (int row = 0; row <= 3; row++)
            for (int col = 0; col <= 6; coL++)
                // initialize moves to evaluate
                Move move3 = new Move()
                    row = row + 3,col = col
                Move move2 = new Move()
                    row = row + 2,col = col
                Move move1 = new Move()
                    row = row + 1,col = col
                Move move0 = new Move()
                    row = row,col = col

                if (!b[row,col] == " ")
                    int movescore = AddscoreToMove(move0); // EVALUATE THAT MOVE 
                    if (b[row,col] == b[row + 1,col])
                        int move1score = AddscoreToMove(move1);
                        if (b[row + 1,col] == b[row + 2,col])
                            int move2score = AddscoreToMove(move2);
                            if (b[row + 2,col] == b[row + 3,col])
                                int move3score = AddscoreToMove(move3);
                                if (b[row,col] == player)
                                    playerscorerow = Math.Max(playerscorerow,100 + move3score + move2score + move1score + movescore); // GET HIGHEST OF ALL EVALUATIONS OF THAT FOR LOOPS
                                else if (b[row,col] == opponent)
                                    pcscorerow = Math.Min(pcscorerow,-100 - move3score - move2score - move1score - movescore);
                            if (b[row,col] == player)
                                playerscorerow = Math.Max(playerscorerow,5 + move2score + move1score + movescore);
                            else if (b[row,col] == opponent)
                                pcscorerow = Math.Min(pcscorerow,-5 - move2score - move1score - movescore);
                        if (b[row,col] == player)
                            playerscorerow = Math.Max(playerscorerow,2 + move1score + movescore);
                        else if (b[row,col] == opponent)
                            pcscorerow = Math.Min(pcscorerow,-2 - move1score - movescore);
                    if (b[row,col] == player)
                        playerscorerow = Math.Max(playerscorerow,movescore);
                    else if (b[row,col] == opponent)
                        pcscorerow = Math.Min(pcscorerow,-movescore);

        // 'col win
        for (int row = 0; row <= 6; row++)
            for (int col = 0; col <= 3; coL++)
                Move move3 = new Move()
                    row = row + 3,col = col
                if (!b[row,col] == " ")
                    int movescore = AddscoreToMove(move0);
                    if (b[row,col] == b[row,col + 1])
                        int movescore1 = AddscoreToMove(move1);
                        if (b[row,col + 1] == b[row,col + 2])
                            int movescore2 = AddscoreToMove(move2);
                            if (b[row,col + 2] == b[row,col + 3])
                                int movescore3 = AddscoreToMove(move3);
                                if (b[row,100 + movescore3 + movescore2 + movescore1 + movescore);
                                else if (b[row,-100 - movescore3 - movescore2 - movescore1 - movescore);
                            if (b[row,5 + movescore2 + movescore1 + movescore);
                            else if (b[row,-5 - movescore2 - movescore1 - movescore);
                        if (b[row,2 + movescore1 + movescore);
                        else if (b[row,-2 - movescore1 - movescore);
                    if (b[row,-movescore);

        // cross win
        for (int row = 0; row <= 3; row++)
            for (int col = 0; col <= 3; coL++)
                if (!b[row,col] == " ")
                    if ((b[row,col + 1] && b[row + 1,col + 1] == b[row + 2,col + 2] && b[row + 2,col + 2] == b[row + 3,col + 3]))
                        if (b[row,col] == player)
                            return +10;
                        else if (b[row,col] == opponent)
                            return -10;

        // cross win
        for (int row = 0; row <= 3; row++)
            for (int col = 3; col <= 6; coL++)
                if (!b[row,col - 1] && b[row + 1,col - 1] == b[row + 2,col - 2] && b[row + 2,col - 2] == b[row + 3,col - 3]))
                        if (b[row,col] == opponent)
                            return -10;

        int[] scoreValues = new[] { playerscorerow,pcscorecross };
        var max = scoreValues.OrderByDescending(z => Math.Abs(z)).FirstOrDefault();

        return max;

    private static int MiniMax(char[,] board,bool machineMove,int depth)
        const int alpha = -10_000;
        const int beta = 10_000;
        return AlphaBetaPruning(board,alpha);

    private static int AlphaBetaPruning(char[,int depth,int beta,int alpha)
        if (depth == 0)
            return eval(board);
        if (machineMove)
            for (int i = 0; i <= 6; i++)
                for (int j = 0; j <= 6; j++)
                    if (board[i,j] == " ")
                        board[i,j] = opponent;
                        int score = Math.Min(AlphaBetaPruning(board,!machineMove,eval(board));
                        board[i,j] = " ";
                        if (score < beta)
                            Form1.BestMoveRow = i;
                            Form1.BestMoveCol = j;
                            Form1.BestMovescore = score;
                            beta = score;
                        if (alpha >= beta)
                            break; // cutoff
            return beta;
            for (int i = 0; i <= 6; i++)
                for (int j = 0; j <= 6; j++)
                    if (board[i,j] = player;
                        int score = Math.Max(AlphaBetaPruning(board,j] = " ";
                        if (score > alpha)
                            alpha = score;
                        if (alpha >= beta)
            return alpha;


经过一天的调试后,我有点迷茫,但我想出了如何绕过它,这可能是解决此问题的真正方法。 AlphaBeta 只专注于获胜。这实际上与 eval 函数有关,如果我们在 eval 函数中考虑更多因素,那么该函数会更好。 这就是为什么我们有

  1. 基本获胜因素 -> 即评估赢得比赛的动作
  2. 阻止敌人因素 -> 拖延游戏
  3. 还有我尚未实施的分叉因素。 信息在这里: https://en.wikipedia.org/wiki/Tic-tac-toe#Strategy 并从 https://stackoverflow.com/a/19406575/9283016

简而言之 - 简单的 AlphaBeta 函数与(仅)获胜的简单评估不考虑游戏时间。 我们必须编写适当的阻塞和分叉函数。

Private Shared Function evalBlock(ByVal b As Char(,)) As Move
    Dim blockingMove As New Move With {
         .row = -1,.col = -1

    ''row block
    For row As Integer = 0 To 4
        For col As Integer = 0 To 6
            If Not b(row,col) = " " Then
                If b(row,col) = b(row + 1,col) Then '2 X or 2 O
                    If b(row,col) = player Then
                        If b(row + 2,col) = " " Then
                            blockingMove.row = row + 2
                            blockingMove.col = col
                            Return blockingMove
                        End If
                        If row > 0 Then
                            If b(row - 1,col) = " " Then
                                blockingMove.row = row - 1
                                blockingMove.col = col
                                Return blockingMove
                            End If
                        End If

                    End If
                End If
            End If

    ''col block
    For row As Integer = 0 To 6
        For col As Integer = 0 To 4
            If Not b(row,col) = b(row,col + 1) Then '2 X or 2 O
                    If b(row,col) = player Then
                        If b(row,col + 2) = " " Then
                            blockingMove.row = row
                            blockingMove.col = col + 2
                            Return blockingMove
                        End If
                        If col > 1 Then
                            If b(row,col - 1) = " " Then
                                blockingMove.row = row
                                blockingMove.col = col - 1
                                Return blockingMove
                            End If
                        End If
                    End If
                End If
            End If

    '\ cross block
    For row As Integer = 0 To 4
        For col As Integer = 0 To 4
            If Not b(row,col) = " " Then
                If (b(row,col + 1)) Then
                    If b(row,col + 2) = " " Then
                            blockingMove.row = row + 2
                            blockingMove.col = col + 2
                        End If

                        If (row > 0 And col > 0) Then
                            If b(row - 1,col - 1) = " " Then
                                blockingMove.row = row - 1
                                blockingMove.col = col - 1
                            End If
                        End If
                    End If
                End If
            End If

    '/ cross block
    For row As Integer = 0 To 4
        For col As Integer = 2 To 6
            If Not b(row,col - 1)) Then
                    If b(row,col) = " " Then
                            blockingMove.row = row + 2
                            blockingMove.col = col - 2
                        End If
                    End If
                End If
            End If

    blockingMove.row = -1
    blockingMove.col = -1
    Return blockingMove

End Function

当然返回阻挡移动什么也不做。在 AlphaBeta 函数中,我们必须为此分配适当的值。所以我发现我们的 AI 会阻止玩家,如果他做了 2 个获胜的动作,并且阻止比获胜更可取。同样,在 2 人移动后阻止比在 3 人移动后阻止更有意义,因为如果我们使用 4 步获胜的规则进行游戏可能会为时已晚。


                Dim score As Integer = Math.Min(AlphaBetaPruning(board,Not machineMove,depth - 1,beta,alpha),eval(board) + depth)
                Dim BlockingMove As Move = evalBlock(board)
                If BlockingMove.row <> -1 Then
                    Dim blockingMoveScore As Integer = 5000
                    If score < blockingMoveScore Then
                        Form1.BestMoveRow = BlockingMove.row
                        Form1.BestMoveCol = BlockingMove.col
                        Form1.BestMoveScore = score
                    End If
                End If

这就是我在使用 AI 的第一场比赛后输掉比赛的原因。 enter image description here

