merge commit

This commit is contained in:
Ticho Hidding
2025-12-08 14:55:32 +01:00
parent ca7f9e8ecf
commit 03dc6130e2
12 changed files with 120 additions and 753 deletions

View File

@@ -1,18 +1,17 @@
package org.toop.game.reversi;
package org.toop.game.games.reversi;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.util.ModelSerializer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.toop.game.AI;
import org.toop.game.records.Move;
import org.toop.framework.gameFramework.model.player.AbstractAI;
import java.io.IOException;
import java.io.InputStream;
import static java.lang.Math.random;
public class ReversiAIML extends AI<Reversi>{
public class ReversiAIML extends AbstractAI<ReversiR> {
MultiLayerNetwork model;
@@ -24,35 +23,35 @@ public class ReversiAIML extends AI<Reversi>{
} catch (IOException e) {}
}
public Move findBestMove(Reversi reversi, int depth){
int[] input = reversi.getBoardInt();
INDArray boardInput = Nd4j.create(new int[][] { input });
INDArray prediction = model.output(boardInput);
int move = pickLegalMove(prediction,reversi);
return new Move(move, reversi.getCurrentPlayer());
}
private int pickLegalMove(INDArray prediction, Reversi reversi) {
private int pickLegalMove(INDArray prediction, ReversiR reversi) {
double[] logits = prediction.toDoubleVector();
Move[] legalMoves = reversi.getLegalMoves();
int[] legalMoves = reversi.getLegalMoves();
if (legalMoves.length == 0) return -1;
int bestMove = legalMoves[0].position();
int bestMove = legalMoves[0];
double bestVal = logits[bestMove];
if (random() < 0.01){
return legalMoves[(int)(random()*legalMoves.length-.5)].position();
return legalMoves[(int)(random()*legalMoves.length-.5)];
}
for (Move move : legalMoves) {
int pos = move.position();
if (logits[pos] > bestVal) {
bestMove = pos;
bestVal = logits[pos];
for (int move : legalMoves) {
if (logits[move] > bestVal) {
bestMove = move;
bestVal = logits[move];
}
}
return bestMove;
}
@Override
public int getMove(ReversiR game) {
int[] input = game.getBoard();
INDArray boardInput = Nd4j.create(new int[][] { input });
INDArray prediction = model.output(boardInput);
int move = pickLegalMove(prediction,game);
return move;
}
}

View File

@@ -1,28 +1,33 @@
package org.toop.game.reversi;
package org.toop.game.games.reversi;
import org.toop.game.AI;
import org.toop.game.records.Move;
import org.toop.framework.gameFramework.model.player.AbstractAI;
import java.util.Arrays;
import java.awt.*;
public class ReversiAISimple extends AI<Reversi> {
public class ReversiAISimple extends AbstractAI<ReversiR> {
private int getNumberOfOptions(ReversiR game, int move){
ReversiR copy = game.deepCopy();
copy.play(move);
return copy.getLegalMoves().length;
}
private int getScore(ReversiR game, int move){
return game.getFlipsForPotentialMove(new Point(move%game.getColumnSize(),move/game.getRowSize()),game.getCurrentTurn()).length;
}
@Override
public Move findBestMove(Reversi game, int depth) {
//IO.println("****START FIND BEST MOVE****");
public int getMove(ReversiR game) {
Move[] moves = game.getLegalMoves();
int[] moves = game.getLegalMoves();
//game.printBoard();
//IO.println("Legal moves: " + Arrays.toString(moves));
Move bestMove;
Move bestMoveScore = moves[0];
Move bestMoveOptions = moves[0];
int bestMove;
int bestMoveScore = moves[0];
int bestMoveOptions = moves[0];
int bestScore = -1;
int bestOptions = -1;
for (Move move : moves){
for (int move : moves){
int numOpt = getNumberOfOptions(game, move);
if (numOpt > bestOptions) {
bestOptions = numOpt;
@@ -50,14 +55,4 @@ public class ReversiAISimple extends AI<Reversi> {
}
return bestMove;
}
private int getNumberOfOptions(Reversi game, Move move){
Reversi copy = new Reversi(game);
copy.play(move);
return copy.getLegalMoves().length;
}
private int getScore(Reversi game, Move move){
return game.getFlipsForPotentialMove(move).length;
}
}

View File

@@ -254,7 +254,24 @@ public final class ReversiR extends AbstractGame<ReversiR> {
});
return Arrays.stream(moves).mapToInt(Integer::intValue).toArray();
}
public int[] getMostRecentlyFlippedPieces() {
return mostRecentlyFlippedPieces;
}
public Score getScore() {
int[] board = getBoard();
int p1 = 0;
int p2 = 0;
for (int i = 0; i < this.getColumnSize() * this.getRowSize(); i++) {
if (board[i] == 1) {
p1 += 1;
}
if (board[i] == 2) {
p2 += 1;
}
}
return new Score(p1, p2);
}
}

View File

@@ -0,0 +1,198 @@
package org.toop.game.machinelearning;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.util.ModelSerializer;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.config.Adam;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.toop.framework.gameFramework.GameState;
import org.toop.framework.gameFramework.model.game.PlayResult;
import org.toop.framework.gameFramework.model.player.AbstractAI;
import org.toop.framework.gameFramework.model.player.Player;
import org.toop.game.games.reversi.ReversiAIR;
import org.toop.game.games.reversi.ReversiR;
import org.toop.game.games.reversi.ReversiAIML;
import org.toop.game.games.reversi.ReversiAISimple;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static java.lang.Math.abs;
import static java.lang.Math.random;
public class NeuralNetwork {
private MultiLayerConfiguration conf;
private MultiLayerNetwork model;
private AbstractAI<ReversiR> opponentAI;
private AbstractAI<ReversiR> opponentRand = new ReversiAIR();
private AbstractAI<ReversiR> opponentSimple = new ReversiAISimple();
private AbstractAI<ReversiR> opponentAIML = new ReversiAIML();
public NeuralNetwork() {}
public void init(){
conf = new NeuralNetConfiguration.Builder()
.updater(new Adam(0.001))
.weightInit(WeightInit.XAVIER) //todo understand
.list()
.layer(new DenseLayer.Builder()
.nIn(64)
.nOut(128)
.activation(Activation.RELU)
.build())
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.nIn(128)
.nOut(64)
.activation(Activation.SOFTMAX)
.build())
.build();
model = new MultiLayerNetwork(conf);
IO.println(model.params());
loadModel();
IO.println(model.params());
model.init();
IO.println(model.summary());
model.setLearningRate(0.0003);
trainingLoop();
saveModel();
}
public void saveModel(){
File modelFile = new File("reversi-model.zip");
try {
ModelSerializer.writeModel(model, modelFile, true);
}catch (Exception e){
e.printStackTrace();
}
}
public void loadModel(){
File modelFile = new File("reversi-model.zip");
try {
model = ModelSerializer.restoreMultiLayerNetwork(modelFile);
} catch (IOException e) {
e.printStackTrace();
}
}
public void trainingLoop(){
int totalGames = 5000;
double epsilon = 0.05;
long start = System.nanoTime();
for (int game = 0; game<totalGames; game++){
char modelPlayer = random()<0.5?'B':'W';
ReversiR reversi = new ReversiR(new Player[2]);
opponentAI = getOpponentAI();
List<StateAction> gameHistory = new ArrayList<>();
PlayResult state = new PlayResult(GameState.NORMAL,reversi.getCurrentTurn());
double reward = 0;
while (state.state() != GameState.DRAW && state.state() != GameState.WIN){
int curr = reversi.getCurrentTurn();
int move;
if (curr == modelPlayer) {
int[] input = reversi.getBoard();
if (Math.random() < epsilon) {
int[] moves = reversi.getLegalMoves();
move = moves[(int) (Math.random() * moves.length - .5f)];
} else {
INDArray boardInput = Nd4j.create(new int[][]{input});
INDArray prediction = model.output(boardInput);
int location = pickLegalMove(prediction, reversi);
gameHistory.add(new StateAction(input, location));
move = location;
}
}else{
move = opponentAI.getMove(reversi);
}
state = reversi.play(move);
}
//IO.println(model.params());
ReversiR.Score score = reversi.getScore();
int scoreDif = abs(score.player1Score() - score.player2Score());
if (score.player1Score() > score.player2Score()){
reward = 1 + ((scoreDif / 64.0) * 0.5);
}else if (score.player1Score() < score.player2Score()){
reward = -1 - ((scoreDif / 64.0) * 0.5);
}else{
reward = 0;
}
if (modelPlayer == 'W'){
reward = -reward;
}
for (StateAction step : gameHistory){
trainFromHistory(step, reward);
}
//IO.println("Wr: " + (double)p1wins/(game+1) + " draws: " + draws);
if(game % 100 == 0){
IO.println("Completed game " + game + " | Reward: " + reward);
//IO.println(Arrays.toString(reversi.getBoardDouble()));
}
}
long end = System.nanoTime();
IO.println((end-start));
}
private int pickLegalMove(INDArray prediction, ReversiR reversi){
double[] probs = prediction.toDoubleVector();
int[] legalMoves = reversi.getLegalMoves();
if (legalMoves.length == 0) return -1;
int bestMove = legalMoves[0];
double bestVal = probs[bestMove];
for (int move : legalMoves){
if (probs[move] > bestVal){
bestMove = move;
bestVal = probs[bestMove];
}
}
return bestMove;
}
private AbstractAI<ReversiR> getOpponentAI(){
return switch ((int) (Math.random() * 4)) {
case 0 -> opponentRand;
case 1 -> opponentSimple;
case 2 -> opponentAIML;
default -> opponentRand;
};
}
private void trainFromHistory(StateAction step, double reward){
double[] output = new double[64];
output[step.action] = reward;
DataSet ds = new DataSet(
Nd4j.create(new int[][] { step.state }),
Nd4j.create(new double[][] { output })
);
model.fit(ds);
}
}

View File

@@ -0,0 +1,10 @@
package org.toop.game.machinelearning;
public class StateAction {
int[] state;
int action;
public StateAction(int[] state, int action) {
this.state = state;
this.action = action;
}
}

View File

@@ -1,301 +0,0 @@
package org.toop.game.reversi;
import org.toop.game.TurnBasedGame;
import org.toop.game.enumerators.GameState;
import org.toop.game.records.Move;
import java.awt.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public final class Reversi extends TurnBasedGame {
private int movesTaken;
private Set<Point> filledCells = new HashSet<>();
private Move[] mostRecentlyFlippedPieces;
private char[][] cachedBoard;
public record Score(int player1Score, int player2Score) {}
public Reversi() {
super(8, 8, 2);
addStartPieces();
}
public Reversi(Reversi other) {
super(other);
this.movesTaken = other.movesTaken;
this.filledCells = other.filledCells;
this.mostRecentlyFlippedPieces = other.mostRecentlyFlippedPieces;
}
private void addStartPieces() {
this.setBoard(new Move(27, 'W'));
this.setBoard(new Move(28, 'B'));
this.setBoard(new Move(35, 'B'));
this.setBoard(new Move(36, 'W'));
updateFilledCellsSet();
cachedBoard = makeBoardAGrid();
}
private void updateFilledCellsSet() {
for (int i = 0; i < 64; i++) {
if (this.getBoard()[i] == 'W' || this.getBoard()[i] == 'B') {
filledCells.add(new Point(i % this.getColumnSize(), i / this.getRowSize()));
}
}
}
@Override
public Move[] getLegalMoves() {
final ArrayList<Move> legalMoves = new ArrayList<>();
char[][] boardGrid = cachedBoard;
char currentPlayer = (this.getCurrentTurn()==0) ? 'B' : 'W';
char opponent = (currentPlayer=='W') ? 'B' : 'W';
Set<Point> adjCell = getAdjacentCells(boardGrid, opponent);
for (Point point : adjCell){
Move[] moves = getFlipsForPotentialMove(point, currentPlayer, opponent, boardGrid);
int score = moves.length;
if (score > 0){
legalMoves.add(new Move(point.x + point.y * this.getRowSize(), currentPlayer));
}
}
return legalMoves.toArray(new Move[0]);
}
private Set<Point> getAdjacentCells(char[][] boardGrid, char opponent) {
Set<Point> possibleCells = new HashSet<>();
for (Point point : filledCells) { //for every filled cell
if (boardGrid[point.x][point.y] == opponent) {
for (int deltaColumn = -1; deltaColumn <= 1; deltaColumn++) { //check adjacent cells
for (int deltaRow = -1; deltaRow <= 1; deltaRow++) { //orthogonally and diagonally
int newX = point.x + deltaColumn, newY = point.y + deltaRow;
if (deltaColumn == 0 && deltaRow == 0 //continue if out of bounds
|| !isOnBoard(newX, newY)) {
continue;
}
if (boardGrid[newY][newX] == EMPTY) { //check if the cell is empty
possibleCells.add(new Point(newX, newY)); //and then add it to the set of possible moves
}
}
}
}
}
return possibleCells;
}
public Move[] getFlipsForPotentialMove(Point point, char currentPlayer, char opponent, char[][] boardGrid) {
final ArrayList<Move> movesToFlip = new ArrayList<>();
for (int deltaColumn = -1; deltaColumn <= 1; deltaColumn++) { //for all directions
for (int deltaRow = -1; deltaRow <= 1; deltaRow++) {
if (deltaColumn == 0 && deltaRow == 0){
continue;
}
Move[] moves = getFlipsInDirection(point, boardGrid, currentPlayer, opponent, deltaColumn, deltaRow);
if (moves != null) { //getFlipsInDirection
movesToFlip.addAll(Arrays.asList(moves));
}
}
}
return movesToFlip.toArray(new Move[0]);
}
public Move[] getFlipsForPotentialMove(Move move) {
char curr = getCurrentPlayer();
char opp = getOpponent(curr);
Point point = new Point(move.position() % this.getRowSize(), move.position() / this.getColumnSize());
return getFlipsForPotentialMove(point, curr, opp, cachedBoard);
}
private Move[] getFlipsInDirection(Point point, char[][] boardGrid, char currentPlayer, char opponent, int dirX, int dirY) {
final ArrayList<Move> movesToFlip = new ArrayList<>();
int x = point.x + dirX;
int y = point.y + dirY;
if (!isOnBoard(x, y) || boardGrid[y][x] != opponent) { //there must first be an opponents tile
return null;
}
while (isOnBoard(x, y) && boardGrid[y][x] == opponent) { //count the opponents tiles in this direction
movesToFlip.add(new Move(x+y*this.getRowSize(), currentPlayer));
x += dirX;
y += dirY;
}
if (isOnBoard(x, y) && boardGrid[y][x] == currentPlayer) {
return movesToFlip.toArray(new Move[0]); //only return the count if last tile is ours
}
return null;
}
private boolean isOnBoard(int x, int y) {
return x >= 0 && x < this.getColumnSize() && y >= 0 && y < this.getRowSize();
}
public char[][] makeBoardAGrid() {
char[][] boardGrid = new char[this.getRowSize()][this.getColumnSize()];
for (int i = 0; i < 64; i++) {
boardGrid[i / this.getRowSize()][i % this.getColumnSize()] = this.getBoard()[i]; //boardGrid[y -> row] [x -> column]
}
return boardGrid;
}
@Override
public GameState play(Move move) {
if (cachedBoard == null) {
cachedBoard = makeBoardAGrid();
}
Move[] legalMoves = getLegalMoves();
boolean moveIsLegal = false;
for (Move legalMove : legalMoves) { //check if the move is legal
if (move.equals(legalMove)) {
moveIsLegal = true;
break;
}
}
if (!moveIsLegal) {
return null;
}
Move[] moves = sortMovesFromCenter(getFlipsForPotentialMove(new Point(move.position()%this.getColumnSize(),move.position()/this.getRowSize()), move.value(),move.value() == 'B'? 'W': 'B',makeBoardAGrid()),move);
mostRecentlyFlippedPieces = moves;
this.setBoard(move); //place the move on the board
for (Move m : moves) {
this.setBoard(m); //flip the correct pieces on the board
}
filledCells.add(new Point(move.position() % this.getRowSize(), move.position() / this.getColumnSize()));
cachedBoard = makeBoardAGrid();
nextTurn();
if (getLegalMoves().length == 0) { //skip the players turn when there are no legal moves
skipMyTurn();
if (getLegalMoves().length > 0) {
return GameState.TURN_SKIPPED;
}
else { //end the game when neither player has a legal move
Score score = getScore();
if (score.player1Score() == score.player2Score()) {
return GameState.DRAW;
}
else {
return GameState.WIN;
}
}
}
return GameState.NORMAL;
}
private void skipMyTurn(){
//IO.println("TURN " + getCurrentPlayer() + " SKIPPED");
//TODO: notify user that a turn has been skipped
nextTurn();
}
public char getCurrentPlayer() {
if (this.getCurrentTurn() == 0){
return 'B';
}
else {
return 'W';
}
}
private char getOpponent(char currentPlayer){
if (currentPlayer == 'B') {
return 'W';
}
else {
return 'B';
}
}
public Score getScore(){
int player1Score = 0, player2Score = 0;
for (int count = 0; count < this.getRowSize() * this.getColumnSize(); count++) {
if (this.getBoard()[count] == 'B') {
player1Score += 1;
}
if (this.getBoard()[count] == 'W') {
player2Score += 1;
}
}
return new Score(player1Score, player2Score);
}
public boolean isGameOver(){
Move[] legalMovesW = getLegalMoves();
nextTurn();
Move[] legalMovesB = getLegalMoves();
nextTurn();
if (legalMovesW.length + legalMovesB.length == 0) {
return true;
}
return false;
}
public int getWinner(){
if (!isGameOver()) {
return 0;
}
Score score = getScore();
if (score.player1Score() > score.player2Score()) {
return 1;
}
else if (score.player1Score() < score.player2Score()) {
return 2;
}
return 0;
}
private Move[] sortMovesFromCenter(Move[] moves, Move center) { //sorts the pieces to be flipped for animation purposes
int centerX = center.position()%this.getColumnSize();
int centerY = center.position()/this.getRowSize();
Arrays.sort(moves, (a, b) -> {
int dxA = a.position()%this.getColumnSize() - centerX;
int dyA = a.position()/this.getRowSize() - centerY;
int dxB = b.position()%this.getColumnSize() - centerX;
int dyB = b.position()/this.getRowSize() - centerY;
int distA = dxA * dxA + dyA * dyA;
int distB = dxB * dxB + dyB * dyB;
return Integer.compare(distA, distB);
});
return moves;
}
public Move[] getMostRecentlyFlippedPieces() {
return mostRecentlyFlippedPieces;
}
public int[] getBoardInt(){
char[] input = getBoard();
int[] result = new int[input.length];
for (int i = 0; i < input.length; i++) {
switch (input[i]) {
case 'W':
result[i] = -1;
break;
case 'B':
result[i] = 1;
break;
case ' ':
default:
result[i] = 0;
break;
}
}
return result;
}
public Point moveToPoint(Move move){
return new Point(move.position()%this.getColumnSize(),move.position()/this.getRowSize());
}
public void printBoard(){
for (int row = 0; row < this.getRowSize(); row++) {
IO.println(Arrays.toString(cachedBoard[row]));
}
}
}

View File

@@ -1,15 +0,0 @@
package org.toop.game.reversi;
import org.toop.game.AI;
import org.toop.game.records.Move;
public final class ReversiAI extends AI<Reversi> {
@Override
public Move findBestMove(Reversi game, int depth) {
Move[] moves = game.getLegalMoves();
int inty = (int)(Math.random() * moves.length-.5f);
if (moves.length == 0) return null;
return moves[inty];
}
}

View File

@@ -1,31 +1,40 @@
/*//todo fix this mess
package org.toop.game.tictactoe;
import java.util.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.toop.framework.gameFramework.model.player.AbstractAI;
import org.toop.framework.gameFramework.model.player.Player;
import org.toop.game.AI;
import org.toop.game.enumerators.GameState;
import org.toop.game.games.reversi.ReversiAIR;
import org.toop.game.games.reversi.ReversiR;
import org.toop.game.records.Move;
import org.toop.game.reversi.Reversi;
import org.toop.game.reversi.ReversiAI;
import org.toop.game.reversi.ReversiAIML;
import org.toop.game.reversi.ReversiAISimple;
import org.toop.game.games.reversi.ReversiAIML;
import org.toop.game.games.reversi.ReversiAISimple;
import static org.junit.jupiter.api.Assertions.*;
class ReversiTest {
private Reversi game;
private ReversiAI ai;
private ReversiR game;
private ReversiAIR ai;
private ReversiAIML aiml;
private ReversiAISimple aiSimple;
private AI<Reversi> player1;
private AI<Reversi> player2;
private AbstractAI<ReversiR> player1;
private AbstractAI<ReversiR> player2;
private Player[] players = new Player[2];
@BeforeEach
void setup() {
game = new Reversi();
ai = new ReversiAI();
game = new ReversiR(players);
ai = new ReversiAIR();
aiml = new ReversiAIML();
aiSimple = new ReversiAISimple();
@@ -231,7 +240,7 @@ class ReversiTest {
int draws = 0;
List<Integer> moves = new ArrayList<>();
for (int i = 0; i < totalGames; i++) {
game = new Reversi();
game = new ReversiR();
while (!game.isGameOver()) {
char curr = game.getCurrentPlayer();
Move move;
@@ -258,4 +267,6 @@ class ReversiTest {
}
IO.println("p1 winrate: " + p1wins + "/" + totalGames + " = " + (double) p1wins / totalGames + "\np2wins: " + p2wins + " draws: " + draws);
}
}
}
*/