PassiveTDAgentTest.java example

Explorer
DistSysDesign-master
- 590
  - h2
    - src
      - h2.java
  - h3
    - h3.java
- eclipseWorkspace
package aima.test.core.unit.learning.reinforcement.agent;

import java.util.HashMap;
import java.util.Map;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.environment.cellworld.CellWorldFactory;
import aima.core.learning.reinforcement.agent.PassiveTDAgent;
import aima.core.learning.reinforcement.example.CellWorldEnvironment;
import aima.core.probability.example.MDPFactory;
import aima.core.util.JavaRandomizer;

public class PassiveTDAgentTest extends ReinforcementLearningAgentTest {
	//
	private CellWorld<Double> cw = null;
	private CellWorldEnvironment cwe = null;
	private PassiveTDAgent<Cell<Double>, CellWorldAction> ptda = null;

	@Before
	public void setUp() {
		cw = CellWorldFactory.createCellWorldForFig17_1();
		cwe = new CellWorldEnvironment(
				cw.getCellAt(1, 1),
				cw.getCells(),
				MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
				new JavaRandomizer());

		Map<Cell<Double>, CellWorldAction> fixedPolicy = new HashMap<Cell<Double>, CellWorldAction>();
		fixedPolicy.put(cw.getCellAt(1, 1), CellWorldAction.Up);
		fixedPolicy.put(cw.getCellAt(1, 2), CellWorldAction.Up);
		fixedPolicy.put(cw.getCellAt(1, 3), CellWorldAction.Right);
		fixedPolicy.put(cw.getCellAt(2, 1), CellWorldAction.Left);
		fixedPolicy.put(cw.getCellAt(2, 3), CellWorldAction.Right);
		fixedPolicy.put(cw.getCellAt(3, 1), CellWorldAction.Left);
		fixedPolicy.put(cw.getCellAt(3, 2), CellWorldAction.Up);
		fixedPolicy.put(cw.getCellAt(3, 3), CellWorldAction.Right);
		fixedPolicy.put(cw.getCellAt(4, 1), CellWorldAction.Left);

		ptda = new PassiveTDAgent<Cell<Double>, CellWorldAction>(fixedPolicy,
				0.2, 1.0);

		cwe.addAgent(ptda);
	}

	@Test
	public void test_TD_learning_fig21_1() {

		ptda.reset();
		cwe.executeTrials(10000);

		Map<Cell<Double>, Double> U = ptda.getUtility();

		Assert.assertNotNull(U.get(cw.getCellAt(1, 1)));

		// Note:
		// These are not reachable when starting at 1,1 using
		// the policy and default transition model
		// (i.e. 80% intended, 10% each right angle from intended).
		Assert.assertNull(U.get(cw.getCellAt(3, 1)));
		Assert.assertNull(U.get(cw.getCellAt(4, 1)));
		Assert.assertEquals(9, U.size());

		// Note: Due to stochastic nature of environment,
		// will not test the individual utilities calculated
		// as this will take a fair amount of time.
		// Instead we will check if the RMS error in utility
		// for 1,1 is below a reasonable threshold.
		test_RMSeiu_for_1_1(ptda, 20, 1000, 0.07);
	}

	// Note: Enable this test if you wish to generate tables for
	// creating figures, in a spreadsheet, of the learning
	// rate of the agent.
	@Ignore
	@Test
	public void test_TD_learning_rate_fig21_5() {
		test_utility_learning_rates(ptda, 20, 500, 100, 1);
	}
}