Machine learning for reversi.

performance improvements for reversi.getlegalmoves
This commit is contained in:
Ticho Hidding
2025-12-02 10:59:33 +01:00
parent 1a11827ba3
commit 7e913ff50f
11 changed files with 464 additions and 24 deletions

View File

@@ -99,6 +99,16 @@
<artifactId>error_prone_annotations</artifactId>
<version>2.42.0</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>1.0.0-M2.1</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native-platform</artifactId>
<version>1.0.0-M2.1</version>
</dependency>
</dependencies>

View File

@@ -15,6 +15,7 @@ public final class Reversi extends TurnBasedGame {
private int movesTaken;
private Set<Point> filledCells = new HashSet<>();
private Move[] mostRecentlyFlippedPieces;
private char[][] cachedBoard;
public record Score(int player1Score, int player2Score) {}
@@ -37,6 +38,7 @@ public final class Reversi extends TurnBasedGame {
this.setBoard(new Move(35, 'B'));
this.setBoard(new Move(36, 'W'));
updateFilledCellsSet();
cachedBoard = makeBoardAGrid();
}
private void updateFilledCellsSet() {
for (int i = 0; i < 64; i++) {
@@ -49,11 +51,13 @@ public final class Reversi extends TurnBasedGame {
@Override
public Move[] getLegalMoves() {
final ArrayList<Move> legalMoves = new ArrayList<>();
char[][] boardGrid = makeBoardAGrid();
char[][] boardGrid = cachedBoard;
char currentPlayer = (this.getCurrentTurn()==0) ? 'B' : 'W';
Set<Point> adjCell = getAdjacentCells(boardGrid);
char opponent = (currentPlayer=='W') ? 'B' : 'W';
Set<Point> adjCell = getAdjacentCells(boardGrid, opponent);
for (Point point : adjCell){
Move[] moves = getFlipsForPotentialMove(point,currentPlayer);
Move[] moves = getFlipsForPotentialMove(point, currentPlayer, opponent, boardGrid);
int score = moves.length;
if (score > 0){
legalMoves.add(new Move(point.x + point.y * this.getRowSize(), currentPlayer));
@@ -62,18 +66,20 @@ public final class Reversi extends TurnBasedGame {
return legalMoves.toArray(new Move[0]);
}
private Set<Point> getAdjacentCells(char[][] boardGrid) {
private Set<Point> getAdjacentCells(char[][] boardGrid, char opponent) {
Set<Point> possibleCells = new HashSet<>();
for (Point point : filledCells) { //for every filled cell
for (int deltaColumn = -1; deltaColumn <= 1; deltaColumn++){ //check adjacent cells
for (int deltaRow = -1; deltaRow <= 1; deltaRow++){ //orthogonally and diagonally
int newX = point.x + deltaColumn, newY = point.y + deltaRow;
if (deltaColumn == 0 && deltaRow == 0 //continue if out of bounds
|| !isOnBoard(newX, newY)) {
continue;
}
if (boardGrid[newY][newX] == EMPTY) { //check if the cell is empty
possibleCells.add(new Point(newX, newY)); //and then add it to the set of possible moves
if (boardGrid[point.x][point.y] == opponent) {
for (int deltaColumn = -1; deltaColumn <= 1; deltaColumn++) { //check adjacent cells
for (int deltaRow = -1; deltaRow <= 1; deltaRow++) { //orthogonally and diagonally
int newX = point.x + deltaColumn, newY = point.y + deltaRow;
if (deltaColumn == 0 && deltaRow == 0 //continue if out of bounds
|| !isOnBoard(newX, newY)) {
continue;
}
if (boardGrid[newY][newX] == EMPTY) { //check if the cell is empty
possibleCells.add(new Point(newX, newY)); //and then add it to the set of possible moves
}
}
}
}
@@ -81,14 +87,14 @@ public final class Reversi extends TurnBasedGame {
return possibleCells;
}
public Move[] getFlipsForPotentialMove(Point point, char currentPlayer) {
public Move[] getFlipsForPotentialMove(Point point, char currentPlayer, char opponent, char[][] boardGrid) {
final ArrayList<Move> movesToFlip = new ArrayList<>();
for (int deltaColumn = -1; deltaColumn <= 1; deltaColumn++) { //for all directions
for (int deltaRow = -1; deltaRow <= 1; deltaRow++) {
if (deltaColumn == 0 && deltaRow == 0){
continue;
}
Move[] moves = getFlipsInDirection(point,makeBoardAGrid(),currentPlayer,deltaColumn,deltaRow);
Move[] moves = getFlipsInDirection(point, boardGrid, currentPlayer, opponent, deltaColumn, deltaRow);
if (moves != null) { //getFlipsInDirection
movesToFlip.addAll(Arrays.asList(moves));
}
@@ -97,8 +103,14 @@ public final class Reversi extends TurnBasedGame {
return movesToFlip.toArray(new Move[0]);
}
private Move[] getFlipsInDirection(Point point, char[][] boardGrid, char currentPlayer, int dirX, int dirY) {
char opponent = getOpponent(currentPlayer);
public Move[] getFlipsForPotentialMove(Move move) {
char curr = getCurrentPlayer();
char opp = getOpponent(curr);
Point point = new Point(move.position() % this.getRowSize(), move.position() / this.getColumnSize());
return getFlipsForPotentialMove(point, curr, opp, cachedBoard);
}
private Move[] getFlipsInDirection(Point point, char[][] boardGrid, char currentPlayer, char opponent, int dirX, int dirY) {
final ArrayList<Move> movesToFlip = new ArrayList<>();
int x = point.x + dirX;
int y = point.y + dirY;
@@ -123,7 +135,7 @@ public final class Reversi extends TurnBasedGame {
return x >= 0 && x < this.getColumnSize() && y >= 0 && y < this.getRowSize();
}
private char[][] makeBoardAGrid() {
public char[][] makeBoardAGrid() {
char[][] boardGrid = new char[this.getRowSize()][this.getColumnSize()];
for (int i = 0; i < 64; i++) {
boardGrid[i / this.getRowSize()][i % this.getColumnSize()] = this.getBoard()[i]; //boardGrid[y -> row] [x -> column]
@@ -133,6 +145,9 @@ public final class Reversi extends TurnBasedGame {
@Override
public GameState play(Move move) {
if (cachedBoard == null) {
cachedBoard = makeBoardAGrid();
}
Move[] legalMoves = getLegalMoves();
boolean moveIsLegal = false;
for (Move legalMove : legalMoves) { //check if the move is legal
@@ -145,13 +160,14 @@ public final class Reversi extends TurnBasedGame {
return null;
}
Move[] moves = sortMovesFromCenter(getFlipsForPotentialMove(new Point(move.position()%this.getColumnSize(),move.position()/this.getRowSize()), move.value()),move);
Move[] moves = sortMovesFromCenter(getFlipsForPotentialMove(new Point(move.position()%this.getColumnSize(),move.position()/this.getRowSize()), move.value(),move.value() == 'B'? 'W': 'B',makeBoardAGrid()),move);
mostRecentlyFlippedPieces = moves;
this.setBoard(move); //place the move on the board
for (Move m : moves) {
this.setBoard(m); //flip the correct pieces on the board
}
filledCells.add(new Point(move.position() % this.getRowSize(), move.position() / this.getColumnSize()));
cachedBoard = makeBoardAGrid();
nextTurn();
if (getLegalMoves().length == 0) { //skip the players turn when there are no legal moves
skipMyTurn();
@@ -172,7 +188,7 @@ public final class Reversi extends TurnBasedGame {
}
private void skipMyTurn(){
IO.println("TURN " + getCurrentPlayer() + " SKIPPED");
//IO.println("TURN " + getCurrentPlayer() + " SKIPPED");
//TODO: notify user that a turn has been skipped
nextTurn();
}
@@ -207,6 +223,32 @@ public final class Reversi extends TurnBasedGame {
}
return new Score(player1Score, player2Score);
}
public boolean isGameOver(){
Move[] legalMovesW = getLegalMoves();
nextTurn();
Move[] legalMovesB = getLegalMoves();
nextTurn();
if (legalMovesW.length + legalMovesB.length == 0) {
return true;
}
return false;
}
public int getWinner(){
if (!isGameOver()) {
return 0;
}
Score score = getScore();
if (score.player1Score() > score.player2Score()) {
return 1;
}
else if (score.player1Score() < score.player2Score()) {
return 2;
}
return 0;
}
private Move[] sortMovesFromCenter(Move[] moves, Move center) { //sorts the pieces to be flipped for animation purposes
int centerX = center.position()%this.getColumnSize();
int centerY = center.position()/this.getRowSize();
@@ -226,4 +268,34 @@ public final class Reversi extends TurnBasedGame {
public Move[] getMostRecentlyFlippedPieces() {
return mostRecentlyFlippedPieces;
}
public int[] getBoardInt(){
char[] input = getBoard();
int[] result = new int[input.length];
for (int i = 0; i < input.length; i++) {
switch (input[i]) {
case 'W':
result[i] = -1;
break;
case 'B':
result[i] = 1;
break;
case ' ':
default:
result[i] = 0;
break;
}
}
return result;
}
public Point moveToPoint(Move move){
return new Point(move.position()%this.getColumnSize(),move.position()/this.getRowSize());
}
public void printBoard(){
for (int row = 0; row < this.getRowSize(); row++) {
IO.println(Arrays.toString(cachedBoard[row]));
}
}
}

View File

@@ -4,6 +4,7 @@ import org.toop.game.AI;
import org.toop.game.records.Move;
public final class ReversiAI extends AI<Reversi> {
@Override
public Move findBestMove(Reversi game, int depth) {
Move[] moves = game.getLegalMoves();

View File

@@ -0,0 +1,52 @@
package org.toop.game.reversi;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.util.ModelSerializer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.toop.game.AI;
import org.toop.game.records.Move;
import java.io.IOException;
import java.io.InputStream;
public class ReversiAIML extends AI<Reversi>{
MultiLayerNetwork model;
public ReversiAIML() {
InputStream is = getClass().getResourceAsStream("/reversi-model.zip");
try {
assert is != null;
model = ModelSerializer.restoreMultiLayerNetwork(is);
} catch (IOException e) {}
}
public Move findBestMove(Reversi reversi, int depth){
int[] input = reversi.getBoardInt();
INDArray boardInput = Nd4j.create(new int[][] { input });
INDArray prediction = model.output(boardInput);
int move = pickLegalMove(prediction, reversi);
return new Move(move, reversi.getCurrentPlayer());
}
private int pickLegalMove(INDArray prediction, Reversi reversi){
double[] probs = prediction.toDoubleVector();
Move[] legalMoves = reversi.getLegalMoves();
if (legalMoves.length == 0) return -1;
int bestMove = legalMoves[0].position();
double bestVal = probs[bestMove];
for (Move move : legalMoves){
if (probs[move.position()] > bestVal){
bestMove = move.position();
bestVal = probs[bestMove];
}
}
return bestMove;
}
}

View File

@@ -0,0 +1,57 @@
package org.toop.game.reversi;
import org.toop.game.AI;
import org.toop.game.records.Move;
import java.util.Arrays;
public class ReversiAISimple extends AI<Reversi> {
@Override
public Move findBestMove(Reversi game, int depth) {
//IO.println("****START FIND BEST MOVE****");
Move[] moves = game.getLegalMoves();
//game.printBoard();
//IO.println("Legal moves: " + Arrays.toString(moves));
Move bestMove;
Move bestMoveScore = moves[0];
Move bestMoveOptions = moves[0];
int bestScore = -1;
int bestOptions = -1;
for (Move move : moves){
int numOpt = getNumberOfOptions(game, move);
if (numOpt > bestOptions) {
bestOptions = numOpt;
bestMoveOptions = move;
}
int numSco = getScore(game, move);
if (numSco > bestScore) {
bestScore = numSco;
bestMoveScore = move;
}
//IO.println("Move: " + move.position() + ". Options: " + numOpt + ". Score: " + numSco);
}
if (bestScore > bestOptions) {
bestMove = bestMoveScore;
}
else{
bestMove = bestMoveOptions;
}
return bestMove;
}
private int getNumberOfOptions(Reversi game, Move move){
Reversi copy = new Reversi(game);
copy.play(move);
return copy.getLegalMoves().length;
}
private int getScore(Reversi game, Move move){
return game.getFlipsForPotentialMove(move).length;
}
}

View File

@@ -8,17 +8,24 @@ import org.toop.game.enumerators.GameState;
import org.toop.game.records.Move;
import org.toop.game.reversi.Reversi;
import org.toop.game.reversi.ReversiAI;
import org.toop.game.reversi.ReversiAIML;
import org.toop.game.reversi.ReversiAISimple;
import static org.junit.jupiter.api.Assertions.*;
class ReversiTest {
private Reversi game;
private ReversiAI ai;
private ReversiAIML aiml;
private ReversiAISimple aiSimple;
@BeforeEach
void setup() {
game = new Reversi();
ai = new ReversiAI();
aiml = new ReversiAIML();
aiSimple = new ReversiAISimple();
}
@@ -190,4 +197,35 @@ class ReversiTest {
}
return false;
}
@Test
void testAIvsAIML(){
IO.println("Testing AI simple ...");
int totalGames = 5000;
int p1wins = 0;
int p2wins = 0;
int draws = 0;
for (int i = 0; i < totalGames; i++) {
game = new Reversi();
while (!game.isGameOver()) {
char curr = game.getCurrentPlayer();
if (curr == 'W') {
game.play(ai.findBestMove(game,5));
}
else {
game.play(ai.findBestMove(game,5));
}
}
int winner = game.getWinner();
if (winner == 1) {
p1wins++;
}else if (winner == 2) {
p2wins++;
}
else{
draws++;
}
}
IO.println("p1 winrate: " + p1wins + "/" + totalGames + " = " + (double)p1wins/totalGames + "\np2wins: " + p2wins + " draws: " + draws);
}
}