MDPFactory.java example

Explorer
aima-java-master
- aima-java-AIMA3e
package aima.core.probability.example;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.probability.mdp.ActionsFunction;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.RewardFunction;
import aima.core.probability.mdp.TransitionProbabilityFunction;
import aima.core.probability.mdp.impl.MDP;

/**
 * 
 * @author Ciaran O'Reilly
 * @author Ravi Mohan
 */
public class MDPFactory {

	/**
	 * Constructs an MDP that can be used to generate the utility values
	 * detailed in Fig 17.3.
	 * 
	 * @param cw
	 *            the cell world from figure 17.1.
	 * @return an MDP that can be used to generate the utility values detailed
	 *         in Fig 17.3.
	 */
	public static MarkovDecisionProcess<Cell<Double>, CellWorldAction> createMDPForFigure17_3(
			final CellWorld<Double> cw) {

		return new MDP<Cell<Double>, CellWorldAction>(cw.getCells(),
				cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw),
				createTransitionProbabilityFunctionForFigure17_1(cw),
				createRewardFunctionForFigure17_1());
	}

	/**
	 * Returns the allowed actions from a specified cell within the cell world
	 * described in Fig 17.1.
	 * 
	 * @param cw
	 *            the cell world from figure 17.1.
	 * @return the set of actions allowed at a particular cell. This set will be
	 *         empty if at a terminal state.
	 */
	public static ActionsFunction<Cell<Double>, CellWorldAction> createActionsFunctionForFigure17_1(
			final CellWorld<Double> cw) {
		final Set<Cell<Double>> terminals = new HashSet<Cell<Double>>();
		terminals.add(cw.getCellAt(4, 3));
		terminals.add(cw.getCellAt(4, 2));

		ActionsFunction<Cell<Double>, CellWorldAction> af = new ActionsFunction<Cell<Double>, CellWorldAction>() {

			@Override
			public Set<CellWorldAction> actions(Cell<Double> s) {
				// All actions can be performed in each cell
				// (except terminal states)
				if (terminals.contains(s)) {
					return Collections.emptySet();
				}
				return CellWorldAction.actions();
			}
		};
		return af;
	}

	/**
	 * Figure 17.1 (b) Illustration of the transition model of the environment:
	 * the 'intended' outcome occurs with probability 0.8, but with probability
	 * 0.2 the agent moves at right angles to the intended direction. A
	 * collision with a wall results in no movement.
	 * 
	 * @param cw
	 *            the cell world from figure 17.1.
	 * @return the transition probability function as described in figure 17.1.
	 */
	public static TransitionProbabilityFunction<Cell<Double>, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1(
			final CellWorld<Double> cw) {
		TransitionProbabilityFunction<Cell<Double>, CellWorldAction> tf = new TransitionProbabilityFunction<Cell<Double>, CellWorldAction>() {
			private double[] distribution = new double[] { 0.8, 0.1, 0.1 };

			@Override
			public double probability(Cell<Double> sDelta, Cell<Double> s,
					CellWorldAction a) {
				double prob = 0;

				List<Cell<Double>> outcomes = possibleOutcomes(s, a);
				for (int i = 0; i < outcomes.size(); i++) {
					if (sDelta.equals(outcomes.get(i))) {
						// Note: You have to sum the matches to
						// sDelta as the different actions
						// could have the same effect (i.e.
						// staying in place due to there being
						// no adjacent cells), which increases
						// the probability of the transition for
						// that state.
						prob += distribution[i];
					}
				}

				return prob;
			}
			
			private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
					CellWorldAction a) {
				// There can be three possible outcomes for the planned action
				List<Cell<Double>> outcomes = new ArrayList<Cell<Double>>();

				outcomes.add(cw.result(c, a));
				outcomes.add(cw.result(c, a.getFirstRightAngledAction()));
				outcomes.add(cw.result(c, a.getSecondRightAngledAction()));

				return outcomes;
			}
		};

		return tf;
	}

	/**
	 * 
	 * @return the reward function which takes the content of the cell as being
	 *         the reward value.
	 */
	public static RewardFunction<Cell<Double>> createRewardFunctionForFigure17_1() {
		RewardFunction<Cell<Double>> rf = new RewardFunction<Cell<Double>>() {
			@Override
			public double reward(Cell<Double> s) {
				return s.getContent();
			}
		};
		return rf;
	}
}