package aima.core.probability.example;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import aima.core.environment.cellworld.Cell;
import aima.core.environment.cellworld.CellWorld;
import aima.core.environment.cellworld.CellWorldAction;
import aima.core.probability.mdp.ActionsFunction;
import aima.core.probability.mdp.MarkovDecisionProcess;
import aima.core.probability.mdp.RewardFunction;
import aima.core.probability.mdp.TransitionProbabilityFunction;
import aima.core.probability.mdp.impl.MDP;
/**
*
* @author Ciaran O'Reilly
* @author Ravi Mohan
*/
public class MDPFactory {
/**
* Constructs an MDP that can be used to generate the utility values
* detailed in Fig 17.3.
*
* @param cw
* the cell world from figure 17.1.
* @return an MDP that can be used to generate the utility values detailed
* in Fig 17.3.
*/
public static MarkovDecisionProcess<Cell<Double>, CellWorldAction> createMDPForFigure17_3(
final CellWorld<Double> cw) {
return new MDP<Cell<Double>, CellWorldAction>(cw.getCells(),
cw.getCellAt(1, 1), createActionsFunctionForFigure17_1(cw),
createTransitionProbabilityFunctionForFigure17_1(cw),
createRewardFunctionForFigure17_1());
}
/**
* Returns the allowed actions from a specified cell within the cell world
* described in Fig 17.1.
*
* @param cw
* the cell world from figure 17.1.
* @return the set of actions allowed at a particular cell. This set will be
* empty if at a terminal state.
*/
public static ActionsFunction<Cell<Double>, CellWorldAction> createActionsFunctionForFigure17_1(
final CellWorld<Double> cw) {
final Set<Cell<Double>> terminals = new HashSet<Cell<Double>>();
terminals.add(cw.getCellAt(4, 3));
terminals.add(cw.getCellAt(4, 2));
ActionsFunction<Cell<Double>, CellWorldAction> af = new ActionsFunction<Cell<Double>, CellWorldAction>() {
@Override
public Set<CellWorldAction> actions(Cell<Double> s) {
// All actions can be performed in each cell
// (except terminal states)
if (terminals.contains(s)) {
return Collections.emptySet();
}
return CellWorldAction.actions();
}
};
return af;
}
/**
* Figure 17.1 (b) Illustration of the transition model of the environment:
* the 'intended' outcome occurs with probability 0.8, but with probability
* 0.2 the agent moves at right angles to the intended direction. A
* collision with a wall results in no movement.
*
* @param cw
* the cell world from figure 17.1.
* @return the transition probability function as described in figure 17.1.
*/
public static TransitionProbabilityFunction<Cell<Double>, CellWorldAction> createTransitionProbabilityFunctionForFigure17_1(
final CellWorld<Double> cw) {
TransitionProbabilityFunction<Cell<Double>, CellWorldAction> tf = new TransitionProbabilityFunction<Cell<Double>, CellWorldAction>() {
private double[] distribution = new double[] { 0.8, 0.1, 0.1 };
@Override
public double probability(Cell<Double> sDelta, Cell<Double> s,
CellWorldAction a) {
double prob = 0;
List<Cell<Double>> outcomes = possibleOutcomes(s, a);
for (int i = 0; i < outcomes.size(); i++) {
if (sDelta.equals(outcomes.get(i))) {
// Note: You have to sum the matches to
// sDelta as the different actions
// could have the same effect (i.e.
// staying in place due to there being
// no adjacent cells), which increases
// the probability of the transition for
// that state.
prob += distribution[i];
}
}
return prob;
}
private List<Cell<Double>> possibleOutcomes(Cell<Double> c,
CellWorldAction a) {
// There can be three possible outcomes for the planned action
List<Cell<Double>> outcomes = new ArrayList<Cell<Double>>();
outcomes.add(cw.result(c, a));
outcomes.add(cw.result(c, a.getFirstRightAngledAction()));
outcomes.add(cw.result(c, a.getSecondRightAngledAction()));
return outcomes;
}
};
return tf;
}
/**
*
* @return the reward function which takes the content of the cell as being
* the reward value.
*/
public static RewardFunction<Cell<Double>> createRewardFunctionForFigure17_1() {
RewardFunction<Cell<Double>> rf = new RewardFunction<Cell<Double>>() {
@Override
public double reward(Cell<Double> s) {
return s.getContent();
}
};
return rf;
}
}