package net.demilich.metastone.game.behaviour.mcts; import java.util.Random; class UctPolicy implements ITreePolicy { private static final double EPSILON = 1e-5; private static final Random random = new Random(); private static final double C = 1 / Math.sqrt(2); @Override public Node select(Node parent) { Node selected = null; double bestValue = Double.NEGATIVE_INFINITY; for (Node child : parent.getChildren()) { double uctValue = child.getVisits() == 0 ? 1000000 : child.getScore() / (double) child.getVisits() + C * Math.sqrt(Math.log(parent.getVisits()) / child.getVisits()) + random.nextDouble() * EPSILON; // small random number to break ties randomly in unexpanded nodes if (uctValue > bestValue) { selected = child; bestValue = uctValue; } } return selected; } }