6 Commits

Author SHA1 Message Date
Ticho Hidding
5501c3893f bitboard merge 2025-12-15 10:28:33 +01:00
Ticho Hidding
ffdec38e5d Merge branch 'Development' into ReversiML
# Conflicts:
#	game/src/main/java/org/toop/game/games/reversi/ReversiR.java
2025-12-13 15:04:40 +01:00
Ticho Hidding
03dc6130e2 merge commit 2025-12-08 14:55:32 +01:00
Ticho Hidding
ca7f9e8ecf Merge branch 'Development' into ReversiML
# Conflicts:
#	app/src/main/java/org/toop/Main.java
#	app/src/main/java/org/toop/app/game/ReversiGame.java
#	game/pom.xml
#	game/src/main/java/org/toop/game/reversi/Reversi.java
#	game/src/main/java/org/toop/game/reversi/ReversiAI.java
#	game/src/test/java/org/toop/game/tictactoe/ReversiTest.java
2025-12-08 11:58:32 +01:00
Ticho Hidding
f6d90ed439 added some useful testing methods.
made training slightly better.
2025-12-08 11:36:31 +01:00
Ticho Hidding
7e913ff50f Machine learning for reversi.
performance improvements for reversi.getlegalmoves
2025-12-02 10:59:33 +01:00
9 changed files with 701 additions and 1 deletions

View File

@@ -16,6 +16,7 @@ import org.toop.framework.audio.*;
import org.toop.framework.audio.events.AudioEvents;
import org.toop.framework.eventbus.EventFlow;
import org.toop.framework.eventbus.GlobalEventBus;
import org.toop.game.machinelearning.NeuralNetwork;
import org.toop.framework.networking.NetworkingClientEventListener;
import org.toop.framework.networking.NetworkingClientManager;
import org.toop.framework.resource.ResourceLoader;
@@ -138,6 +139,12 @@ public final class App extends Application {
stage.show();
//startML();
}
private void startML() {
NeuralNetwork nn = new NeuralNetwork();
nn.init();
}
private void setKeybinds(StackPane root) {

View File

@@ -146,6 +146,12 @@
<artifactId>error_prone_annotations</artifactId>
<version>2.42.0</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-nn</artifactId>
<version>1.0.0-M2.1</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>

View File

@@ -105,6 +105,16 @@
<version>0.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>1.0.0-M2.1</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native-platform</artifactId>
<version>1.0.0-M2.1</version>
</dependency>
</dependencies>

View File

@@ -167,4 +167,13 @@ public class BitboardReversi extends BitboardGame<BitboardReversi> {
private long shift(long bit, int shift, long mask) {
return shift > 0 ? (bit << shift) & mask : (bit >>> -shift) & mask;
}
public boolean isGameOver(){
BitboardReversi copy = this.deepCopy();
if (copy.getLegalMoves() == 0){
nextTurn();
return copy.getLegalMoves() == 0;
}
return false;
}
}

View File

@@ -0,0 +1,228 @@
package org.toop.game.machinelearning;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.util.ModelSerializer;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.config.Adam;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.toop.framework.gameFramework.GameState;
import org.toop.framework.gameFramework.model.game.PlayResult;
import org.toop.framework.gameFramework.model.player.AbstractAI;
import org.toop.framework.gameFramework.model.player.Player;
import org.toop.game.games.reversi.BitboardReversi;
import org.toop.game.players.ArtificialPlayer;
import org.toop.game.players.ai.MiniMaxAI;
import org.toop.game.players.ai.RandomAI;
import org.toop.game.players.ai.ReversiAIML;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static java.lang.Math.abs;
import static java.lang.Math.random;
public class NeuralNetwork {
private MultiLayerConfiguration conf;
private MultiLayerNetwork model;
private AbstractAI<BitboardReversi> opponentAI;
private AbstractAI<BitboardReversi> opponentMM = new MiniMaxAI<>(6);
private AbstractAI<BitboardReversi> opponentRand = new RandomAI<>();
private AbstractAI<BitboardReversi> opponentAIML = new ReversiAIML<>();
private Player[] playerSet = new Player[4];
public NeuralNetwork() {}
public void init(){
initPlayers();
conf = new NeuralNetConfiguration.Builder()
.updater(new Adam(0.001))
.weightInit(WeightInit.XAVIER) //todo understand
.list()
.layer(new DenseLayer.Builder()
.nIn(64)
.nOut(128)
.activation(Activation.RELU)
.build())
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.nIn(128)
.nOut(64)
.activation(Activation.SOFTMAX)
.build())
.build();
model = new MultiLayerNetwork(conf);
IO.println(model.params());
loadModel();
IO.println(model.params());
model.init();
IO.println(model.summary());
model.setLearningRate(0.0003);
trainingLoop();
saveModel();
}
public void initPlayers(){
playerSet[0] = new ArtificialPlayer<>(new MiniMaxAI<BitboardReversi>(6),"MiniMaxAI");
playerSet[1] = new ArtificialPlayer<>(new RandomAI<BitboardReversi>(),"RandomAI");
playerSet[2] = new ArtificialPlayer<>(new ReversiAIML<BitboardReversi>(),"MachineLearningAI");
}
public void saveModel(){
File modelFile = new File("reversi-model.zip");
try {
ModelSerializer.writeModel(model, modelFile, true);
}catch (Exception e){
e.printStackTrace();
}
}
public void loadModel(){
File modelFile = new File("reversi-model.zip");
try {
model = ModelSerializer.restoreMultiLayerNetwork(modelFile);
} catch (IOException e) {
e.printStackTrace();
}
}
public void trainingLoop(){
int totalGames = 5000;
double epsilon = 0.05;
long start = System.nanoTime();
for (int game = 0; game<totalGames; game++){
char modelPlayer = random()<0.5?'B':'W';
BitboardReversi reversi = new BitboardReversi(new Player[2]);
opponentAI = getOpponentAI();
List<StateAction> gameHistory = new ArrayList<>();
PlayResult state = new PlayResult(GameState.NORMAL,reversi.getCurrentTurn());
double reward = 0;
while (state.state() != GameState.DRAW && state.state() != GameState.WIN){
int curr = reversi.getCurrentTurn();
long move;
if (curr == modelPlayer) {
long[] input = reversi.getBoard();
if (Math.random() < epsilon) {
long moves = reversi.getLegalMoves();
move = (long) (Math.random() * Long.bitCount(moves) - .5f);
} else {
INDArray boardInput = Nd4j.create(new long[][]{input});
INDArray prediction = model.output(boardInput);
int location = pickLegalMove(prediction, reversi);
gameHistory.add(new StateAction(input, location));
move = location;
}
}else{
move = opponentAI.getMove(reversi);
}
state = reversi.play(move);
}
//IO.println(model.params());
BitboardReversi.Score score = reversi.getScore();
int scoreDif = abs(score.black() - score.white());
if (score.black() > score.white()){
reward = 1 + ((scoreDif / 64.0) * 0.5);
}else if (score.black() < score.white()){
reward = -1 - ((scoreDif / 64.0) * 0.5);
}else{
reward = 0;
}
if (modelPlayer == 'W'){
reward = -reward;
}
for (StateAction step : gameHistory){
trainFromHistory(step, reward);
}
//IO.println("Wr: " + (double)p1wins/(game+1) + " draws: " + draws);
if(game % 100 == 0){
IO.println("Completed game " + game + " | Reward: " + reward);
//IO.println(Arrays.toString(reversi.getBoardDouble()));
}
}
long end = System.nanoTime();
IO.println((end-start));
}
private int pickLegalMove(INDArray prediction, BitboardReversi reversi) {
double[] logits = prediction.toDoubleVector();
long legalMoves = reversi.getLegalMoves();
if (legalMoves == 0L) {
return -1;
}
if (Math.random() < 0.01) {
int randomIndex = (int) (Math.random() * Long.bitCount(legalMoves));
long moves = legalMoves;
for (int i = 0; i < randomIndex; i++) {
moves &= moves - 1;
}
return Long.numberOfTrailingZeros(moves);
}
int bestMove = -1;
double bestVal = Double.NEGATIVE_INFINITY;
long moves = legalMoves;
while (moves != 0L) {
int move = Long.numberOfTrailingZeros(moves);
double value = logits[move];
if (value > bestVal) {
bestVal = value;
bestMove = move;
}
moves &= moves - 1;
}
return bestMove;
}
private AbstractAI<BitboardReversi> getOpponentAI(){
return switch ((int) (Math.random() * 4)) {
case 0 -> opponentRand;
case 1 -> opponentMM;
case 2 -> opponentAIML;
default -> opponentRand;
};
}
private void trainFromHistory(StateAction step, double reward){
double[] output = new double[64];
output[step.action] = reward;
DataSet ds = new DataSet(
Nd4j.create(new long[][] { step.state }),
Nd4j.create(new double[][] { output })
);
model.fit(ds);
}
}

View File

@@ -0,0 +1,10 @@
package org.toop.game.machinelearning;
public class StateAction {
long[] state;
int action;
public StateAction(long[] state, int action) {
this.state = state;
this.action = action;
}
}

View File

@@ -0,0 +1,80 @@
package org.toop.game.players.ai;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.util.ModelSerializer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.toop.framework.gameFramework.model.game.TurnBasedGame;
import org.toop.framework.gameFramework.model.player.AI;
import org.toop.framework.gameFramework.model.player.AbstractAI;
import org.toop.game.games.reversi.BitboardReversi;
import java.io.IOException;
import java.io.InputStream;
import static java.lang.Math.random;
public class ReversiAIML<T extends TurnBasedGame<T>> extends AbstractAI<T> {
MultiLayerNetwork model;
public ReversiAIML() {
InputStream is = getClass().getResourceAsStream("/reversi-model.zip");
try {
assert is != null;
model = ModelSerializer.restoreMultiLayerNetwork(is);
} catch (IOException e) {}
}
private int pickLegalMove(INDArray prediction, BitboardReversi reversi) {
double[] logits = prediction.toDoubleVector();
long legalMoves = reversi.getLegalMoves();
if (legalMoves == 0L) {
return -1;
}
if (Math.random() < 0.01) {
int randomIndex = (int) (Math.random() * Long.bitCount(legalMoves));
long moves = legalMoves;
for (int i = 0; i < randomIndex; i++) {
moves &= moves - 1;
}
return Long.numberOfTrailingZeros(moves);
}
int bestMove = -1;
double bestVal = Double.NEGATIVE_INFINITY;
long moves = legalMoves;
while (moves != 0L) {
int move = Long.numberOfTrailingZeros(moves);
double value = logits[move];
if (value > bestVal) {
bestVal = value;
bestMove = move;
}
moves &= moves - 1;
}
return bestMove;
}
@Override
public long getMove(T game) {
long[] input = game.getBoard();
INDArray boardInput = Nd4j.create(new long[][] { input });
INDArray prediction = model.output(boardInput);
int move = pickLegalMove(prediction,(BitboardReversi) game);
return move;
}
@Override
public ReversiAIML<T> deepCopy() {
return new ReversiAIML();
}
}

View File

@@ -0,0 +1,272 @@
/*//todo fix this mess
package org.toop.game.tictactoe;
import java.util.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.toop.framework.gameFramework.model.player.AbstractAI;
import org.toop.framework.gameFramework.model.player.Player;
import org.toop.game.AI;
import org.toop.game.enumerators.GameState;
import org.toop.game.games.reversi.ReversiAIR;
import org.toop.game.games.reversi.ReversiR;
import org.toop.game.records.Move;
import org.toop.game.reversi.Reversi;
import org.toop.game.reversi.ReversiAI;
import org.toop.game.players.ai.ReversiAIML;
import org.toop.game.games.reversi.ReversiAISimple;
import static org.junit.jupiter.api.Assertions.*;
class ReversiTest {
private ReversiR game;
private ReversiAIR ai;
private ReversiAIML aiml;
private ReversiAISimple aiSimple;
private AbstractAI<ReversiR> player1;
private AbstractAI<ReversiR> player2;
private Player[] players = new Player[2];
@BeforeEach
void setup() {
game = new ReversiR(players);
ai = new ReversiAIR();
aiml = new ReversiAIML();
aiSimple = new ReversiAISimple();
}
@Test
void testCorrectStartPiecesPlaced() {
assertNotNull(game);
assertEquals('W', game.getBoard()[27]);
assertEquals('B', game.getBoard()[28]);
assertEquals('B', game.getBoard()[35]);
assertEquals('W', game.getBoard()[36]);
}
@Test
void testGetLegalMovesAtStart() {
Move[] moves = game.getLegalMoves();
List<Move> expectedMoves = List.of(
new Move(19, 'B'),
new Move(26, 'B'),
new Move(37, 'B'),
new Move(44, 'B')
);
assertNotNull(moves);
assertTrue(moves.length > 0);
assertMovesMatchIgnoreOrder(expectedMoves, Arrays.asList(moves));
}
private void assertMovesMatchIgnoreOrder(List<Move> expected, List<Move> actual) {
assertEquals(expected.size(), actual.size());
for (int i = 0; i < expected.size(); i++) {
assertTrue(actual.contains(expected.get(i)));
assertTrue(expected.contains(actual.get(i)));
}
}
@Test
void testMakeValidMoveFlipsPieces() {
game.play(new Move(19, 'B'));
assertEquals('B', game.getBoard()[19]);
assertEquals('B', game.getBoard()[27], "Piece should have flipped to B");
}
@Test
void testMakeInvalidMoveDoesNothing() {
char[] before = game.getBoard().clone();
game.play(new Move(0, 'B'));
assertArrayEquals(before, game.getBoard(), "Board should not change on invalid move");
}
@Test
void testTurnSwitchesAfterValidMove() {
char current = game.getCurrentPlayer();
game.play(game.getLegalMoves()[0]);
assertNotEquals(current, game.getCurrentPlayer(), "Player turn should switch after a valid move");
}
@Test
void testCountScoreCorrectlyAtStart() {
long start = System.nanoTime();
Reversi.Score score = game.getScore();
assertEquals(2, score.player1Score()); // Black
assertEquals(2, score.player2Score()); // White
long end = System.nanoTime();
IO.println((end - start));
}
@Test
void zLegalMovesInCertainPosition() {
game.play(new Move(19, 'B'));
game.play(new Move(20, 'W'));
Move[] moves = game.getLegalMoves();
List<Move> expectedMoves = List.of(
new Move(13, 'B'),
new Move(21, 'B'),
new Move(29, 'B'),
new Move(37, 'B'),
new Move(45, 'B'));
assertNotNull(moves);
assertTrue(moves.length > 0);
IO.println(Arrays.toString(moves));
assertMovesMatchIgnoreOrder(expectedMoves, Arrays.asList(moves));
}
@Test
void testCountScoreCorrectlyAtEnd() {
for (int i = 0; i < 1; i++) {
game = new Reversi();
Move[] legalMoves = game.getLegalMoves();
while (legalMoves.length > 0) {
game.play(legalMoves[(int) (Math.random() * legalMoves.length)]);
legalMoves = game.getLegalMoves();
}
Reversi.Score score = game.getScore();
IO.println(score.player1Score());
IO.println(score.player2Score());
for (int r = 0; r < game.getRowSize(); r++) {
char[] row = Arrays.copyOfRange(game.getBoard(), r * game.getColumnSize(), (r + 1) * game.getColumnSize());
IO.println(Arrays.toString(row));
}
}
}
@Test
void testPlayerMustSkipTurnIfNoValidMoves() {
game.play(new Move(19, 'B'));
game.play(new Move(34, 'W'));
game.play(new Move(45, 'B'));
game.play(new Move(11, 'W'));
game.play(new Move(42, 'B'));
game.play(new Move(54, 'W'));
game.play(new Move(37, 'B'));
game.play(new Move(46, 'W'));
game.play(new Move(63, 'B'));
game.play(new Move(62, 'W'));
game.play(new Move(29, 'B'));
game.play(new Move(50, 'W'));
game.play(new Move(55, 'B'));
game.play(new Move(30, 'W'));
game.play(new Move(53, 'B'));
game.play(new Move(38, 'W'));
game.play(new Move(61, 'B'));
game.play(new Move(52, 'W'));
game.play(new Move(51, 'B'));
game.play(new Move(60, 'W'));
game.play(new Move(59, 'B'));
assertEquals('B', game.getCurrentPlayer());
game.play(ai.findBestMove(game, 5));
game.play(ai.findBestMove(game, 5));
}
@Test
void testGameShouldEndIfNoValidMoves() {
//European Grand Prix Ghent 2017: Replay Hassan - Verstuyft J. (3-17)
game.play(new Move(19, 'B'));
game.play(new Move(20, 'W'));
game.play(new Move(29, 'B'));
game.play(new Move(22, 'W'));
game.play(new Move(21, 'B'));
game.play(new Move(34, 'W'));
game.play(new Move(23, 'B'));
game.play(new Move(13, 'W'));
game.play(new Move(26, 'B'));
game.play(new Move(18, 'W'));
game.play(new Move(12, 'B'));
game.play(new Move(4, 'W'));
game.play(new Move(17, 'B'));
game.play(new Move(31, 'W'));
GameState stateTurn15 = game.play(new Move(39, 'B'));
assertEquals(GameState.NORMAL, stateTurn15);
GameState stateTurn16 = game.play(new Move(16, 'W'));
assertEquals(GameState.WIN, stateTurn16);
GameState stateTurn17 = game.play(new Move(5, 'B'));
assertNull(stateTurn17);
Reversi.Score score = game.getScore();
assertEquals(3, score.player1Score());
assertEquals(17, score.player2Score());
}
@Test
void testAISelectsLegalMove() {
Move move = ai.findBestMove(game, 4);
assertNotNull(move);
assertTrue(containsMove(game.getLegalMoves(), move), "AI should always choose a legal move");
}
private boolean containsMove(Move[] moves, Move move) {
for (Move m : moves) {
if (m.equals(move)) return true;
}
return false;
}
@Test
void testAis() {
player1 = aiml;
player2 = ai;
testAIvsAIML();
player2 = aiSimple;
testAIvsAIML();
player1 = ai;
testAIvsAIML();
player2 = aiml;
testAIvsAIML();
player1 = aiml;
testAIvsAIML();
player1 = aiSimple;
testAIvsAIML();
}
@Test
void testAIvsAIML() {
if(player1 == null || player2 == null) {
player1 = aiml;
player2 = ai;
}
int totalGames = 2000;
IO.println("Testing... " + player1.getClass().getSimpleName() + " vs " + player2.getClass().getSimpleName() + " for " + totalGames + " games");
int p1wins = 0;
int p2wins = 0;
int draws = 0;
List<Integer> moves = new ArrayList<>();
for (int i = 0; i < totalGames; i++) {
game = new ReversiR();
while (!game.isGameOver()) {
char curr = game.getCurrentPlayer();
Move move;
if (curr == 'B') {
move = player1.findBestMove(game, 5);
} else {
move = player2.findBestMove(game, 5);
}
if (i%500 == 0) moves.add(move.position());
game.play(move);
}
if (i%500 == 0) {
IO.println(moves);
moves.clear();
}
int winner = game.getWinner();
if (winner == 1) {
p1wins++;
} else if (winner == 2) {
p2wins++;
} else {
draws++;
}
}
IO.println("p1 winrate: " + p1wins + "/" + totalGames + " = " + (double) p1wins / totalGames + "\np2wins: " + p2wins + " draws: " + draws);
}
}
*/

View File

@@ -0,0 +1,78 @@
package org.toop.game.tictactoe;
import java.util.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.toop.framework.gameFramework.model.player.Player;
import org.toop.game.games.reversi.BitboardReversi;
import org.toop.game.players.ArtificialPlayer;
import org.toop.game.players.ai.MiniMaxAI;
import org.toop.game.players.ai.RandomAI;
import static org.junit.jupiter.api.Assertions.*;
public class TestReversi {
private BitboardReversi game;
private Player[] players;
@BeforeEach
void setup(){
players = new Player[2];
players[0] = new ArtificialPlayer<BitboardReversi>(new RandomAI<BitboardReversi>(),"randomAI");
players[1] = new ArtificialPlayer<BitboardReversi>(new MiniMaxAI<BitboardReversi>(10),"miniMaxAI");
game = new BitboardReversi(players);
}
@Test
void testCorrectStartPiecesPlaced() {
assertNotNull(game);
long[] board = game.getBoard();
IO.println(Long.toBinaryString(board[0]));
IO.println(Long.toBinaryString(board[1]));
long black = board[0];
long white = board[1];
assertEquals(1L, ((white >>> 27) & 1L)); //checks if the 27-shifted long has a 1 bit
assertEquals(1L, ((black >>> 28) & 1L));
assertEquals(1L, ((black >>> 35) & 1L));
assertEquals(1L, ((white >>> 36) & 1L));
}
@Test
void testPlayGames(){
int totalGames = 1;
long start = System.nanoTime();
long midtime = System.nanoTime();
int p1wins = 0;
int p2wins = 0;
int draws = 0;
for (int i = 0; i < totalGames; i++){
game = new BitboardReversi(players);
while(!game.isGameOver()){
midtime = System.nanoTime();
int currentTurn = game.getCurrentTurn();
long move = players[currentTurn].getMove(game.deepCopy());
game.play(move);
IO.println(System.nanoTime() - midtime);
}
switch (game.getWinner()){
case 0:
p1wins++;
break;
case 1:
p2wins++;
break;
case -1:
draws++;
break;
}
}
System.out.println(System.nanoTime() - start);
IO.println(p1wins + " " + p2wins + " " + draws);
assertEquals(totalGames, p1wins + p2wins + draws);
IO.println("p1 wr: " + p1wins + "/" + totalGames + " = " + (double) p1wins / totalGames);
}
}