MazeMarkovDecisionProcess.java example

Explorer
GATECH-master
package rl;

import java.io.BufferedReader;
import java.io.FileReader;

import dist.Distribution;

/**
 * A markov decision process representing a maze
 * @author Andrew Guillory gtg008g@mail.gatech.edu
 * @version 1.0
 */
public class MazeMarkovDecisionProcess implements MarkovDecisionProcess {
    /** The default failure probability */
    private static final double FAILURE_PROBABILITY = .01;
    
    /** The reward for solving the maze */
	private static final int REWARD = 100;
	
	/** The number of actions */
    public static final int ACTIONS = 4;    
    /** The move up action */
    public static final int MOVE_UP = 0;
    /** The move down action */
    public static final int MOVE_DOWN = 1;
    /** The move left action */
    public static final int MOVE_LEFT = 2;
    /** The move right action */
    public static final int MOVE_RIGHT = 3;
   
    /** The character representing an empty square */
    public static final char EMPTY = ' ';
    /** The character representing a closed square */
    public static final char OBSTACLE = '#';
    /** The character representing the agent */
    public static final char AGENT = 'o';
    /** The character representing the goal */
    public static final char GOAL = 'x';
    
    /**
     * The maze itself
     */
    private char[][] maze;
    /**
     * The probability of motion failing
     */
    private double motionFailureProbability;
    /**
     * The goal state
     */
    private int goal;
    /**
     * The initial state
     */
    private int initial;
    
    /**
     * Make a new maze markov decision process
     * @param maze the maze itself
     * @param xGoal the x goal
     * @param yGoal the y goal
     * @param xInitial the initial x
     * @param yInitial the initial y
     * @param motionFailureProbability the probability of motion failing
     */
    public MazeMarkovDecisionProcess(char[][] maze, int xGoal, int yGoal,
            int xInitial, int yInitial, double motionFailureProbability) {
        this.maze = maze;
        this.goal = stateFor(xGoal, yGoal);
        this.initial = stateFor(xInitial, yInitial);
        this.motionFailureProbability = motionFailureProbability;
    }
    /**
     * Determine if the state is blocked by an obstacle
     * @param state the state
     * @return true if it is
     */
    public boolean isObstacle(int state) {
        return isObstacle(xFor(state), yFor(state));
    }
    /**
     * Determine if the state is an obstacle
     * @param x the x location
     * @param y the y location
     * @return true if it is
     */
    public boolean isObstacle(int x, int y) {
        return maze[y][x] == OBSTACLE;
    }
    /**
     * Get the height of the maze
     * @return the height of the maze
     */
    public int getHeight() {
        return maze.length;
    }
    /**
     * Get the width of the maze
     * @return the width
     */
    public int getWidth() {
        return maze[0].length;
    }
    /**
     * Get the state for
     * @param x the x location
     * @param y the y location
     * @return the state number
     */
    public int stateFor(int x, int y) {
        return y + x * maze.length;
    }
    /**
     * Get the x coordinate for the given state
     * @param state the state
     * @return the x coordinate
     */
    public int xFor(int state) {
        return state / maze.length;
    }
    /**
     * Get the y coordinate for the given state
     * @param state the state
     * @return the y coordinate
     */
    public int yFor(int state) {
        return state % maze.length;
    }

    /**
     * @see rl.MarkovDecisionProcess#getStateCount()
     */
    public int getStateCount() {
        return maze.length * maze[0].length;
    }

    /**
     * @see rl.MarkovDecisionProcess#getActionCount()
     */
    public int getActionCount() {
        return ACTIONS;
    }

    /**
     * @see rl.MarkovDecisionProcess#reward(int, int)
     */
    public double reward(int state, int action) {
        if (state == goal) {
            return REWARD;
        } else {
            return transitionProbability(state, goal, action) * REWARD;
        }
    }

    /**
     * @see rl.MarkovDecisionProcess#transitionProbability(int, int, int)
     */
    public double transitionProbability(int i, int j, int a) {
        int startX = xFor(i), startY = yFor(i);
        int endX = xFor(j), endY = yFor(j);
        if (startX != endX && startY != endY) {
        	return 0;
        }
        int dx,dy;
        switch(a) {
            case MOVE_UP:
                dx = 0; dy = -1;
                break;
            case MOVE_DOWN:
                dx = 0; dy = 1;
                break;
            case MOVE_LEFT:
                dx = -1; dy = 0;
                break;
            case MOVE_RIGHT:
                dx = 1; dy = 0;
                break;
            default:
            	dx = 0; dy = 0;
                break;
        }
        if (endX == startX && endY == startY) {
            if (startX + dx >= getWidth() || startX + dx < 0 ||
                    startY + dy >= getHeight() || startY + dy < 0 ||
                    isObstacle(startX + dx, startY + dy)) {
                return 1;
            } else {
                return motionFailureProbability;
            }
        } else if (endX == startX + dx && endY == startY + dy) {
            if (startX + dx >= getWidth() || startX + dx < 0 ||
                    startY + dy >= getHeight() || startY + dy < 0 ||
                    isObstacle(startX + dx, startY + dy)) {
                return 0;
            } else {
                return 1 - motionFailureProbability;
            }
        } else {
            return 0;
        }
    }

    /**
     * @see rl.MarkovDecisionProcess#sampleState(int, int)
     */
    public int sampleState(int i, int a) {
        if (Distribution.random.nextDouble() < motionFailureProbability) {
        	return i;
        }
        int nextState = -1;
        switch(a) {
        	case MOVE_UP:
        		nextState = stateFor(xFor(i), yFor(i) - 1);
        		break;
            case MOVE_DOWN:
            	nextState = stateFor(xFor(i), yFor(i) + 1);
            	break;
            case MOVE_LEFT:
            	nextState = stateFor(xFor(i) - 1, yFor(i));
            	break;
            case MOVE_RIGHT:
            	nextState = stateFor(xFor(i) + 1, yFor(i));
            	break;
            default:
            	nextState = i;
        }
        if (maze[yFor(nextState)][xFor(nextState)] == OBSTACLE) {
            nextState = i;
        }
        return nextState;
    }

    /**
     * @see rl.MarkovDecisionProcess#sampleInitialState()
     */
    public int sampleInitialState() {
        return initial;
    }

    /**
     * @see rl.MarkovDecisionProcess#isTerminalState(int)
     */
    public boolean isTerminalState(int state) {
        return state == goal;
    }
    
    /**
     * Load a maze from a text file
     * @param fileName the file to read from
     * @throws an exception when there's an error reading
     * the file
     */
    public static MazeMarkovDecisionProcess load(String fileName) throws Exception {
        BufferedReader br = new BufferedReader(new FileReader(fileName));
        int height = 1;
        String line = br.readLine();
        int width = line.length();
        while((line = br.readLine()) != null) {
            height++;
        }
        br.close();
        char[][] maze = new char[height][width];
        br = new BufferedReader(new FileReader(fileName));
        int goalX = -1, goalY = -1;
        int initialX = -1, initialY = -1;
        for (int i = 0; i < maze.length; i++) {
            line = br.readLine();
            for (int j = 0; j < maze[i].length; j++) {
                char c = line.charAt(j);
                if (c == AGENT) {
                    initialX = j;
                    initialY = i;
                    maze[i][j] = EMPTY;
                } else if (c == GOAL) {
                    goalX = j;
                    goalY = i;
                    maze[i][j] = EMPTY;
                } else if (c == OBSTACLE) {
                    maze[i][j] = OBSTACLE;
                } else {
                    maze[i][j] = EMPTY;
                }
            }
        }
        br.close();
        return new MazeMarkovDecisionProcess(maze, goalX, goalY,
                initialX, initialY, FAILURE_PROBABILITY);
    }
    
    /**
     * Return a string representation
     * @return the string representation
     */
    public String toString() {
        String ret = "";
        int initialX = xFor(initial), initialY = yFor(initial);
        int goalX = xFor(goal), goalY = yFor(goal);
        for (int i = 0; i < maze.length; i++) {
            for (int j = 0; j < maze[i].length; j++) {
                if (i == initialY && j == initialX) {
                    ret += "o";
                } else if (i == goalY && j == goalX) {
                    ret += "x";
                } else {
                    ret += maze[i][j];
                }
            }
            ret += "\n";
        }
        return ret;
    }    
    

}