package aima.test.core.unit.learning.reinforcement.agent; import java.util.Map; import org.junit.Assert; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; import aima.core.environment.cellworld.Cell; import aima.core.environment.cellworld.CellWorld; import aima.core.environment.cellworld.CellWorldAction; import aima.core.environment.cellworld.CellWorldFactory; import aima.core.learning.reinforcement.agent.QLearningAgent; import aima.core.learning.reinforcement.example.CellWorldEnvironment; import aima.core.probability.example.MDPFactory; import aima.core.util.JavaRandomizer; public class QLearningAgentTest extends ReinforcementLearningAgentTest { // private CellWorld<Double> cw = null; private CellWorldEnvironment cwe = null; private QLearningAgent<Cell<Double>, CellWorldAction> qla = null; @Before public void setUp() { cw = CellWorldFactory.createCellWorldForFig17_1(); cwe = new CellWorldEnvironment( cw.getCellAt(1, 1), cw.getCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), new JavaRandomizer()); qla = new QLearningAgent<Cell<Double>, CellWorldAction>(MDPFactory .createActionsFunctionForFigure17_1(cw), CellWorldAction.None, 0.2, 1.0, 5, 2.0); cwe.addAgent(qla); } @Test public void test_Q_learning() { qla.reset(); cwe.executeTrials(100000); Map<Cell<Double>, Double> U = qla.getUtility(); Assert.assertNotNull(U.get(cw.getCellAt(1, 1))); // Note: // As the Q-Learning Agent is not using a fixed // policy it should with a reasonable number // of iterations observe and calculate an // approximate utility for all of the states. Assert.assertEquals(11, U.size()); // Note: Due to stochastic nature of environment, // will not test the individual utilities calculated // as this will take a fair amount of time. // Instead we will check if the RMS error in utility // for 1,1 is below a reasonable threshold. test_RMSeiu_for_1_1(qla, 20, 10000, 0.2); } // Note: Enable this test if you wish to generate tables for // creating figures, in a spreadsheet, of the learning // rate of the agent. @Ignore @Test public void test_Q_learning_rate() { test_utility_learning_rates(qla, 20, 10000, 500, 20); } }