Tic Tac ToeとMinimax - マイクロコントローラで不完全なAIを作成する

私は完璧なAI（完全な意味で失うことはありません）を含め、マイクロコントローラでTic-Tac-Toeゲームを作成しました。私はミニマックスアルゴリズムを使用していませんでしたが、すべての可能な最適な動きを持つちょっとしたステートマシンでした。私の問題は、今はさまざまな困難（簡単、中、難）を実装したいということです。これまでのAIは難しいものでした。これを行う方法を考えて、minimaxアルゴリズムを使いたいと思ってしまいましたが、すべてのゲームポジションのすべての得点を計算するようにしました。最高の。私はいつもマイクロコントローラそのものでこれらの計算をすべて行うことができないので、コンピュータ上で実行できる小さなプログラムを作りたいと思っていました。これは、すべてのボード状態のアレイを提供します（対称性に関しては、使用された）とそれらのスコアによる。これについては、まず各状態のscoresを正しく計算するために、depthに関してミニマックスアルゴリズムそのものを実装しようとしました。それから、配列の中で（今のところ）全ての最適な動きを私に返すことになっていました。しかし、それはうまく動作していないようです。私はいくつかのprintf行でデバッグしようとしました。Tic Tac ToeとMinimax - マイクロコントローラで不完全なAIを作成する

static int minimax(int *board, int depth) 
{ 
    int score; 
    int move = -1; 
    int scores[9]; 
    int nextDepth; 

    printf("\n----- Called Minimax, Depth: %i -----\n\n", depth); 

    if(depth%2 ==1){ 
     player = -1; 
    } else { 
     player = 1; 
    } 

    printf("Player: %i\n---\n", player); 

    if(isWin(board) != 0){ 
     score = (10-depth)*winningPlayer; 

     printf("Player %i won on depth %i\n", winningPlayer, depth); 
     printf("Resulting score: (10-%i)*%i = %i\nScore returned to depth %i\n---\n", depth, winningPlayer, score, depth-1); 

     return score; 
    } 

    score = -20; 
    nextDepth = depth+1; 

    printf("Next depth is %i\n---\n", nextDepth); 

    int i; 
    for(i=0; i<9; i++){ 
     if(board[i] == 0) { 

      if(nextDepth%2 ==0) { 
       player = -1; 
      } else { 
       player = 1; 
      } 

      printf("Found vacant space at position %i\n", i); 
      printf("Set value of position %i to %i\n---\n", i, player); 

      board[i] = player; 
      int thisScore = minimax(board, nextDepth); 

      printf("Value of the move at position %i on next depth %i is %i\n---\n", i, nextDepth, thisScore); 

      scores[i] = thisScore; 
      if(thisScore > score){ 

       printf("New score value is greater than the old one: %i < %i\n---\n", thisScore, score); 

       score = thisScore; 
       move = i; 
       g_moves[nextDepth-1] = move; 

       printf("Score was set to %i\n", thisScore); 
       printf("Remembered move %i\n---\n", move); 

      } 
      board[i] = 0; 

      printf("Value of position %i was reset to 0 on next depth %i\n---\n", i, nextDepth); 

     } 
    } 

    if(move == -1) { 

     printf("Game ended in a draw.\n Returned score: 0\n---\n"); 

     return 0; 
    } 

    printf("Move at position %i was selected on next depth %i\n", move, nextDepth); 
    printf("Returning score of %i to depth %i\n---\n", score, depth); 


    return score; 
}

mainは、次のとおりです：ここでのコードは、これまでの両方minimax機能のだけでなく、私の主な機能です

//1 = Beginning Player 
//-1 = second Player 
static int player; 
static int winningPlayer = 0; 
static int g_moves[9]; 

/* 0 1 2 
* 3 4 5 
* 6 7 8 
*/ 
int initBoard[9] = { 
    0, 0, 0, 
    0, 0, 0, 
    0, 0, 0, 
}; 

int board[9];

：さらに

int main(int argc, char **argv) 
{ 
    memcpy(board, initBoard, sizeof(board)); 
    int score = 0; 
    int depth = getDepth(board); 
    score = minimax(board, depth); 
    printf("\n--- finished ---\n\n"); 


    printf("Moves with the highest score: "); 
    int i; 
    for(i=0; i<9; i++){ 
     printf("%i | ", g_moves[i]); 
    } 
    printf("\n"); 

    printf("The score is %i\n", score); 

    printf("The best next board is: \n|----|----|----|\n"); 

    for(i=0; i<3; i++){ 
     printf("| %-2i ", board[i]); 
    } 
    printf("|\n|----|----|----|\n"); 
    for(i=3; i<6; i++){ 
     printf("| %-2i ", board[i]); 
    } 
    printf("|\n|----|----|----|\n"); 
    for(i=6; i<9; i++){ 
     printf("| %-2i ", board[i]); 
    } 
    printf("|\n|----|----|----|\n"); 

    return 0; 
}

、私はいくつかの変数を持っています私の勝利関数と同様に：

int isWin(int *board) 
{ 
    unsigned winningBoards[8][3] = { 
     {board[0], board[1], board[2],}, 
     {board[3], board[4], board[5],}, 
     {board[6], board[7], board[8],}, 
     {board[0], board[3], board[6],}, 
     {board[1], board[4], board[7],}, 
     {board[2], board[5], board[8],}, 
     {board[0], board[4], board[8],}, 
     {board[2], board[4], board[6],}, 
    }; 

    int i; 
    for(i=0; i<8; i++){ 
     if((winningBoards[i][0] != 0) && 
      (winningBoards[i][0] == winningBoards[i][1]) && 
      (winningBoards[i][0] == winningBoards[i][2])){ 
       winningPlayer = winningBoards[i][0]; 
       return winningPlayer; 
      } 
    } 
    return 0; 
}

depth 7から最後にdepth 1に戻ると、g_movesがすべて0に上書きされ、結果として次の行が出力されます（最後の70行のみ）：

... 
----- Called Minimax, Depth: 7 ----- 

Player: -1                                                                  
---                                                                    
Player 1 won on depth 7                                                               
Resulting score: (10-7)*1 = 3                                                             
Score returned to depth 6                                                              
---                                                                    
Value of the move at position 2 on next depth 7 is 3                                                       
---                                                                    
Value of position 2 was reset to 0 on next depth 7                                                        
---                                                                    
Move at position 0 was selected on next depth 7                                                         
Returning score of 3 to depth 6                                                             
---                                                                    
Value of the move at position 3 on next depth 6 is 3                                                       
---                                                                    
Value of position 3 was reset to 0 on next depth 6                                                        
---                                                                    
Move at position 0 was selected on next depth 6                                                         
Returning score of 3 to depth 5                                                             
---                                                                    
Value of the move at position 4 on next depth 5 is 3                                                       
---                                                                    
Value of position 4 was reset to 0 on next depth 5                                                        
---                                                                    
Move at position 0 was selected on next depth 5                                                         
Returning score of 3 to depth 4                                                             
---                                                                    
Value of the move at position 5 on next depth 4 is 3                                                       
---                                                                    
Value of position 5 was reset to 0 on next depth 4                                                        
---                                                                    
Move at position 0 was selected on next depth 4                                                         
Returning score of 3 to depth 3                                                             
---                                                                    
Value of the move at position 6 on next depth 3 is 3                                                       
---                                                                    
Value of position 6 was reset to 0 on next depth 3                                                        
---                                                                    
Move at position 0 was selected on next depth 3                                                         
Returning score of 5 to depth 2                                                             
---                                                                    
Value of the move at position 7 on next depth 2 is 5                                                       
---                                                                    
Value of position 7 was reset to 0 on next depth 2                                                        
---                                                                    
Move at position 0 was selected on next depth 2                                                         
Returning score of 5 to depth 1                                                             
---                                                                    
Value of the move at position 8 on next depth 1 is 5                                                       
---                                                                    
Value of position 8 was reset to 0 on next depth 1                                                        
---                                                                    
Move at position 0 was selected on next depth 1                                                         
Returning score of 5 to depth 0 
--- 

--- finished --- 

Moves with the highest score: 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 
The score is 5 
The best next board is: 
|----|----|----| 
| 0 | 0 | 0 | 
|----|----|----| 
| 0 | 0 | 0 | 
|----|----|----| 
| 0 | 0 | 0 | 
|----|----|----|

私を助けるために他の情報が必要な場合は、私が自分自身を持っている場合、私はそれらをあなたに与えることができてうれしいです。

ありがとうございます。

EDIT：（：に従ってフォルダ内の./NAME_OF_FILE> DEST_NAME.txt CMD） それが今コンソールを使用して、.txtファイルにすべての可能なボードの状態を表示しますので、だから、私はminimax機能を書き直しました。コードは以下である：

int minimax(int *board, int depth) 
{ 
    g_node++; 
    int player; 
    int move = -1; 
    int score = -20; 
    int thisScore = -20; 
    int i; 

    if(isWin(board) != 0){ 
     printf("\nNode: %i\n", g_node); 
     printf("Board state:"); 
     for(i=0;i<9;i++) { 
      if((i%3) == 0) 
       printf("\n"); 
      printf("%2i ", board[i]); 
     } 
     printf("\n"); 
     printf("has a score of %i\n", (10-depth)*winningPlayer); 
     return (10-depth)*winningPlayer; 
    } 


    if(depth%2 ==1){ 
      player = -1; 
     } else { 
      player = 1; 
     } 
    for(i=0; i<9; i++){ 
     if(board[i] == 0){ 
      board[i] = player; 
      thisScore = minimax(board, depth+1); 
      if(thisScore > score){ 
       score = thisScore; 
       move = i; 
      } 
      board[i] = 0; 
     } 
    } 

    printf("\nNode: %i\n", g_node); 
    printf("Board state:"); 
    for(i=0;i<9;i++) { 
     if((i%3) == 0) 
      printf("\n"); 
     printf("%2i ", board[i]); 
    } 
    printf("\n"); 

    if(move == -1){ 
     printf("has a score of 0\n"); 
     return 0; 

    } 
    printf("has a score of %i\n", score); 
    return score; 
}

私の次のステップは、係る位置での各移動の最大scoreでボードを印刷することです。

Example: 
10 8 10 
8 7 8 
10 8 10 
for the empty board at the beginning.

EDIT 2：私は今、基本的に、しかし、それに問題がある、私は私の最後の編集に上記discribed何を行うことになっているprintScoredBoardsと呼ばれる別の機能を追加しました。相手が何気なくプレイし、minimaxがそれらを含むすべての可能性を試しているので、次のコードで私は空のボードに15点のスコアボードを手に入れます。

コーナーはもっと価値があり、センターは最も価値がありません。誰もがこのための回避策を知っているのですか？

EDIT：私は新しいミニマックスアルゴリズムを設定し、別のポストでそれを掲載しました。投稿は右の「リンク」セクション、またはhereにあります。私がやっていることは、マイクロコントローラのコードに実装して、すべての得点の中からベスト/セカンドベストを選ぶことができ、同じスコアで複数の動きがあればランダム化する機能を作り出すことです。この投稿はとなりました。です。

出典

2016-07-20 Lithimlin

tic-tac-toeのような非常に小さな状態空間では、最適なステップまたは別のランダムステップを選択するかどうかを決定するために、完璧なAIと「サイコロを回す」ことができます。さらに、媒体は、他のプレイヤーの勝利を妨げるようなチェックを行うことができます。 – lorro

それは実際にはあまりにも悪い考えではないし、このことが致命的であることが判明したら私はちょうどそれを行うだろうと思うが、私はまだ私が間違っている場所を知りたい。それはおそらく私がちょうど不足していることを明らかにしている – Lithimlin

私は2番目のlorroだろう、私は計算能力がminmaxを行うには十分であるか、少なくともalpha-beta-pruningでminmaxを行うのに十分であると推測する。どのようなマイクロコントローラですか？私はその計算能力の大まかなアイデアを得たいと思います。 – Codor

私は完全な深さの分析で二番目に良い動きを得ようとしていると思っています。あなたのminmaxの深さを制限することによって木全体を探索することはできません（AIはまだ強いですが、AIは依然として強くなります）、または本当に不完全なAIのためのランダムな動きを使用してください。

出典

2016-07-20 11:49:20

深さを正確に制限するのはどういう意味ですか？ – Lithimlin

ゲームツリー全体を探索するのではなく、2つだけ前に移動します。（int depth = getDepth（board）; int depth = 2;になる） –

だから、各動きにどのような基準をつけるべきですか？ – Lithimlin

Tic Tac ToeとMinimax - マイクロコントローラで不完全なAIを作成する

答えて

関連する問題