Gomoku(五子棋)极小化极大算法AI不关心输赢。

huangapple go评论70阅读模式
英文:

Gomoku (Connect Five) Minimax algorithm AI doesn't care about losing

问题

这是从我的之前的问题继续的。

所以,我认为我已经取得了很大的进展。现在,AI似乎通常会选择最佳的移动并争取胜利,但我遇到的最后一个问题是,它似乎不关心失败。也就是说,如果它有一个叉或者4个连在一起,它会去争取胜利,但如果HUMAN玩家有3个(或4个)连在一起,它不会采取阻止对方获胜的移动。

更奇怪的是,有时如果我将深度设置为较低的值,它会阻止失败,但如果我增加深度,它就不会了。下面是我的当前代码,其中包含一个示例棋盘,它无法找到最佳的移动(以防止下一步失败):

const ROWS = 9;
const COLS = 9;
const LEN = 5;

const EMPTY = 0;
const HUMAN = 1;
const COMP = 2;

const WINNING_MOVE = 100000;

function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
  let newChain = 0;

  while (currChain + newChain < LEN) {
    const row = sRow + (incRow * (newChain + 1));
    const col = sCol + (incCol * (newChain + 1));

    if (grid[row * COLS + col] !== who) {
      break;
    }
    
    newChain++;
  }

  return newChain;
}

function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
  let chain = 1;

  chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
  chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);

  return chain >= LEN;
}

function isWinningMove(grid, who, row, col) {    
  return lineCheck(grid, who, row, col, 1, 0) || 
         lineCheck(grid, who, row, col, 0, 1) || 
         lineCheck(grid, who, row, col, 1, 1) || 
         lineCheck(grid, who, row, col, -1, 1);
}

function getTile(grid, row, col) {
  if (row < 0 || col < 0 || row >= ROWS || col >= COLS) {
    return -1;
  }

  return grid[row * COLS + col];
}

function hasNeighbor(board, row, col) {
  if (getTile(board, row - 1, col - 1) > 0) { return true; }
  if (getTile(board, row - 1, col + 1) > 0) { return true; }
  if (getTile(board, row + 1, col - 1) > 0) { return true; }
  if (getTile(board, row + 1, col + 1) > 0) { return true; }

  if (getTile(board, row - 1, col) > 0) { return true; }
  if (getTile(board, row + 1, col) > 0) { return true; }

  if (getTile(board, row, col - 1) > 0) { return true; }
  if (getTile(board, row, col + 1) > 0) { return true; }

  return false;
}

function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
  if (depth === 0) {
    const val = evaluateBoard(board, latestRow, latestCol);

    return [ val, latestRow * COLS + latestCol ]; // 返回一对(值,移动)
  }

  const opponent = player === COMP ? HUMAN : COMP;

  // player参数应为opponent,并且根据玩家不同应返回不同的语句
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = player === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
  }

  let bestMove = -1;

  if (player === COMP) {
    let maxEval = Number.MIN_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col)[0];
        board[idx] = tileValue;

        if (evaluation > maxEval) {
          maxEval = evaluation;
          bestMove = idx;
        }

        alpha = Math.max(alpha, evaluation);

        if (beta <= alpha) {
          return [ maxEval, bestMove ];
        }
      }
    }

    return [ maxEval, bestMove ];
  } else {
    let minEval = Number.MAX_SAFE_INTEGER;

    for (let row = 0; row < ROWS; row++) {
      for (let col = 0; col < COLS; col++) {
        const idx = row * COLS + col;
        const tileValue = board[idx];

        if (tileValue > 0 || !hasNeighbor(board, row, col)) { continue; }

        board[idx] = player;
        const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col)[0];
        board[idx] = tileValue;

        if (evaluation < minEval) {
          minEval = evaluation;
          bestMove = idx; // 还要跟踪HUMAN的最佳移动。
        }

        beta = Math.min(beta, evaluation);

        if (beta <= alpha) {
          return [ minEval, bestMove ];
        }
      }
    }

    return [ minEval, bestMove ];
  }
}

function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
  let idx = 0;
  let score = 0;

  if (isWinningMove(grid, who, latestRow, latestCol)) {
    return WINNING_MOVE;
  }

  for (let row = 0; row < ROWS; row++) {
    for (let col = 0;

<details>
<summary>英文:</summary>

This is a continuation from [my previous question](https://stackoverflow.com/questions/76717133/gomoku-connect-five-minimax-algorithm-not-finding-obvious-win).

So, I think I&#39;ve made a lot of progress. The AI now seems to generally make the best move, and go for wins, but one last problem I&#39;m having is that it doesn&#39;t seem to care about losing. That is, if it has a fork, or 4 in a row, it will go for the win, but if the `HUMAN` player has 3 (or 4) in a row, it won&#39;t make a move that would stop them from winning.

Even weirder, sometimes if I set the depth to a lower value, it will prevent the loss, but not if I increase the depth. Here&#39;s my current code, with an example board where it fails to find the best move (to prevent the loss next turn):


&lt;!-- begin snippet: js hide: false console: true babel: false --&gt;

&lt;!-- language: lang-js --&gt;

    const ROWS = 9;
    const COLS = 9;
    const LEN = 5;

    const EMPTY = 0;
    const HUMAN = 1;
    const COMP = 2;

    const WINNING_MOVE = 100000;

    function checkDirection(grid, who, currChain, sRow, sCol, incRow, incCol) {
      let newChain = 0;

      while (currChain + newChain &lt; LEN) {
        const row = sRow + (incRow * (newChain + 1));
        const col = sCol + (incCol * (newChain + 1));

        if (grid[row * COLS + col] !== who) {
          break;
        }
        
        newChain++;
      }

      return newChain;
    }

    function lineCheck(grid, who, sRow, sCol, mRow, mCol) {
      let chain = 1;

      chain += checkDirection(grid, who, 0, sRow, sCol, mRow, mCol);
      chain += checkDirection(grid, who, chain, sRow, sCol, -mRow, -mCol);

      return chain &gt;= LEN;
    }

    function isWinningMove(grid, who, row, col) {    
      return lineCheck(grid, who, row, col, 1, 0) || 
             lineCheck(grid, who, row, col, 0, 1) || 
             lineCheck(grid, who, row, col, 1, 1) || 
             lineCheck(grid, who, row, col, -1, 1);
    }

    function getTile(grid, row, col) {
      if (row &lt; 0 || col &lt; 0 || row &gt;= ROWS || col &gt;= COLS) {
        return -1;
      }

      return grid[row * COLS + col];
    }

    function hasNeighbor(board, row, col) {
      if (getTile(board, row - 1, col - 1) &gt; 0) { return true; }
      if (getTile(board, row - 1, col + 1) &gt; 0) { return true; }
      if (getTile(board, row + 1, col - 1) &gt; 0) { return true; }
      if (getTile(board, row + 1, col + 1) &gt; 0) { return true; }

      if (getTile(board, row - 1, col) &gt; 0) { return true; }
      if (getTile(board, row + 1, col) &gt; 0) { return true; }

      if (getTile(board, row, col - 1) &gt; 0) { return true; }
      if (getTile(board, row, col + 1) &gt; 0) { return true; }

      return false;
    }

    function minimax(board, depth, alpha, beta, player, latestRow, latestCol) {
      if (depth === 0) {
        const val = evaluateBoard(board, latestRow, latestCol);

        return [ val, latestRow * COLS + latestCol ]; // returns a pair (value, move)
      }

      const opponent = player === COMP ? HUMAN : COMP;

      // player argument should be opponent, and return statement should be different per player
      if (isWinningMove(board, opponent, latestRow, latestCol)) {
        const multiplier = player === COMP ? 1 : -1;

        return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
      }

      let bestMove = -1;

      if (player === COMP) {
        let maxEval = Number.MIN_SAFE_INTEGER;

        for (let row = 0; row &lt; ROWS; row++) {
          for (let col = 0; col &lt; COLS; col++) {
            const idx = row * COLS + col;
            const tileValue = board[idx];

            if (tileValue &gt; 0 || !hasNeighbor(board, row, col)) { continue; }

            board[idx] = player;
            const evaluation = minimax(board, depth - 1, alpha, beta, HUMAN, row, col)[0];
            board[idx] = tileValue;

            if (evaluation &gt; maxEval) {
              maxEval = evaluation;
              bestMove = idx;
            }

            alpha = Math.max(alpha, evaluation);

            if (beta &lt;= alpha) {
              return [ maxEval, bestMove ];
            }
          }
        }

        return [ maxEval, bestMove ];
      } else {
        let minEval = Number.MAX_SAFE_INTEGER;

        for (let row = 0; row &lt; ROWS; row++) {
          for (let col = 0; col &lt; COLS; col++) {
            const idx = row * COLS + col;
            const tileValue = board[idx];

            if (tileValue &gt; 0 || !hasNeighbor(board, row, col)) { continue; }

            board[idx] = player;
            const evaluation = minimax(board, depth - 1, alpha, beta, COMP, row, col)[0];
            board[idx] = tileValue;

            if (evaluation &lt; minEval) {
              minEval = evaluation;
              bestMove = idx; // Also track best move for HUMAN.
            }

            beta = Math.min(beta, evaluation);

            if (beta &lt;= alpha) {
              return [ minEval, bestMove ];
            }
          }
        }

        return [ minEval, bestMove ];
      }
    }

    function evaluatePlayerBoard(grid, who, latestRow, latestCol) {
      let idx = 0;
      let score = 0;

      if (isWinningMove(grid, who, latestRow, latestCol)) {
        return WINNING_MOVE;
      }

      for (let row = 0; row &lt; ROWS; row++) {
        for (let col = 0; col &lt; COLS; col++) {
          if (grid[idx] !== who) { idx++; continue; }

          if (getTile(grid, row - 1, col - 1) === who) { score++; }
          if (getTile(grid, row - 1, col + 1) === who) { score++; }
          if (getTile(grid, row + 1, col - 1) === who) { score++; }
          if (getTile(grid, row + 1, col + 1) === who) { score++; }

          if (getTile(grid, row - 1, col) === who) { score++; }
          if (getTile(grid, row + 1, col) === who) { score++; }

          if (getTile(grid, row, col - 1) === who) { score++; }
          if (getTile(grid, row, col + 1) === who) { score++; }
          
          // if (getTile(grid, row, col) === who) { score++; }

          idx++;
        } 
      }

      return score;
    }

    function evaluateBoard(grid, latestRow, latestCol) {
      return evaluatePlayerBoard(grid, COMP, latestRow, latestCol) // COMP is maximizing
           - evaluatePlayerBoard(grid, HUMAN, latestRow, latestCol); // HUMAN is minimizing
    }

    function getBestMove(board, maxDepth) {
      for (let depth = 1; depth &lt;= maxDepth; depth++) {
        const [ evaluation, move ] = minimax(board, depth, Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER, COMP, -1, -1);

        // if we found a winning move already, return early
        // otherwise, keep iterating until we reach max depth
        if (evaluation &gt; 10000 || depth === maxDepth) {
          return move;
        }
      }

      return 0; // should never run
    }

    const exampleBoard = [
      0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-8
      0, 0, 0, 0, 0, 0, 0, 0, 0, // 9-17
      0, 0, 0, 0, 0, 0, 2, 0, 0, // 18-26
      0, 0, 2, 2, 0, 1, 0, 0, 0, // 27-35
      0, 0, 0, 2, 1, 0, 0, 0, 0, // 36-44
      0, 0, 0, 1, 0, 0, 0, 0, 0, // 45-53
      0, 0, 1, 0, 0, 0, 0, 0, 0, // 54-62
      0, 0, 0, 0, 0, 0, 0, 0, 0, // 63-71
      0, 0, 0, 0, 0, 0, 0, 0, 0, // 72-80
    ];

    console.log(getBestMove(exampleBoard, 3));

&lt;!-- end snippet --&gt;

I believe it should be logging `64` (to prevent the loss next turn), but it is instead logging `20`

</details>


# 答案1
**得分**: 1

在代码的这部分存在逻辑错误

```javascript
  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = player === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
  }

当进入这个代码块时,我们知道获胜的是 opponent,而不是 player,因此乘数应该基于 opponent。如果 opponent 是最大化的玩家,应该乘以 1,否则乘以 -1。

虽然这不是一个问题,但将 latestRow * COLS + latestCol 作为最佳移动返回是没有意义的,因为当前玩家没有最佳移动(他们输掉了比赛),而 latestRow * COLS + latestCol 显然不是他们的移动,因此它是不相关的。(对于 depth == 0 块也可以做同样的评论)

修复如下:

  // player argument should be opponent, and return statement should be different per player
  if (isWinningMove(board, opponent, latestRow, latestCol)) {
    const multiplier = opponent === COMP ? 1 : -1;

    return [ WINNING_MOVE * multiplier, -1 ];
  }
英文:

You have a logical error in this part of the code:

  // player argument should be opponent, and return statement should be different per player
if (isWinningMove(board, opponent, latestRow, latestCol)) {
const multiplier = player === COMP ? 1 : -1;
return [ WINNING_MOVE * multiplier, latestRow * COLS + latestCol ];
}

When this block is entered, we know that it was the opponent that won, not player, and so the multiplier should be based on opponent. If the opponent is the maximizing player, we should multiply with 1, otherwise with -1.

Not an issue, but it makes no sense to return latestRow * COLS + latestCol as the best move, because the current player has no best move (they lost the game), and latestRow * COLS + latestCol is certainly not their move, so it is irrelevant. (The same remark can be made for the depth == 0 block)

Fix:

  // player argument should be opponent, and return statement should be different per player
if (isWinningMove(board, opponent, latestRow, latestCol)) {
const multiplier = opponent === COMP ? 1 : -1;
return [ WINNING_MOVE * multiplier, -1 ];
}

huangapple
  • 本文由 发表于 2023年7月20日 20:31:28
  • 转载请务必保留本文链接:https://go.coder-hub.com/76729908.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定