package rl; import java.io.BufferedReader; import java.io.FileReader; import dist.Distribution; /** * A markov decision process representing a maze * @author Andrew Guillory gtg008g@mail.gatech.edu * @version 1.0 */ public class MazeMarkovDecisionProcess implements MarkovDecisionProcess { /** The default failure probability */ private static final double FAILURE_PROBABILITY = .01; /** The reward for solving the maze */ private static final int REWARD = 100; /** The number of actions */ public static final int ACTIONS = 4; /** The move up action */ public static final int MOVE_UP = 0; /** The move down action */ public static final int MOVE_DOWN = 1; /** The move left action */ public static final int MOVE_LEFT = 2; /** The move right action */ public static final int MOVE_RIGHT = 3; /** The character representing an empty square */ public static final char EMPTY = ' '; /** The character representing a closed square */ public static final char OBSTACLE = '#'; /** The character representing the agent */ public static final char AGENT = 'o'; /** The character representing the goal */ public static final char GOAL = 'x'; /** * The maze itself */ private char[][] maze; /** * The probability of motion failing */ private double motionFailureProbability; /** * The goal state */ private int goal; /** * The initial state */ private int initial; /** * Make a new maze markov decision process * @param maze the maze itself * @param xGoal the x goal * @param yGoal the y goal * @param xInitial the initial x * @param yInitial the initial y * @param motionFailureProbability the probability of motion failing */ public MazeMarkovDecisionProcess(char[][] maze, int xGoal, int yGoal, int xInitial, int yInitial, double motionFailureProbability) { this.maze = maze; this.goal = stateFor(xGoal, yGoal); this.initial = stateFor(xInitial, yInitial); this.motionFailureProbability = motionFailureProbability; } /** * Determine if the state is blocked by an obstacle * @param state the state * @return true if it is */ public boolean isObstacle(int state) { return isObstacle(xFor(state), yFor(state)); } /** * Determine if the state is an obstacle * @param x the x location * @param y the y location * @return true if it is */ public boolean isObstacle(int x, int y) { return maze[y][x] == OBSTACLE; } /** * Get the height of the maze * @return the height of the maze */ public int getHeight() { return maze.length; } /** * Get the width of the maze * @return the width */ public int getWidth() { return maze[0].length; } /** * Get the state for * @param x the x location * @param y the y location * @return the state number */ public int stateFor(int x, int y) { return y + x * maze.length; } /** * Get the x coordinate for the given state * @param state the state * @return the x coordinate */ public int xFor(int state) { return state / maze.length; } /** * Get the y coordinate for the given state * @param state the state * @return the y coordinate */ public int yFor(int state) { return state % maze.length; } /** * @see rl.MarkovDecisionProcess#getStateCount() */ public int getStateCount() { return maze.length * maze[0].length; } /** * @see rl.MarkovDecisionProcess#getActionCount() */ public int getActionCount() { return ACTIONS; } /** * @see rl.MarkovDecisionProcess#reward(int, int) */ public double reward(int state, int action) { if (state == goal) { return REWARD; } else { return transitionProbability(state, goal, action) * REWARD; } } /** * @see rl.MarkovDecisionProcess#transitionProbability(int, int, int) */ public double transitionProbability(int i, int j, int a) { int startX = xFor(i), startY = yFor(i); int endX = xFor(j), endY = yFor(j); if (startX != endX && startY != endY) { return 0; } int dx,dy; switch(a) { case MOVE_UP: dx = 0; dy = -1; break; case MOVE_DOWN: dx = 0; dy = 1; break; case MOVE_LEFT: dx = -1; dy = 0; break; case MOVE_RIGHT: dx = 1; dy = 0; break; default: dx = 0; dy = 0; break; } if (endX == startX && endY == startY) { if (startX + dx >= getWidth() || startX + dx < 0 || startY + dy >= getHeight() || startY + dy < 0 || isObstacle(startX + dx, startY + dy)) { return 1; } else { return motionFailureProbability; } } else if (endX == startX + dx && endY == startY + dy) { if (startX + dx >= getWidth() || startX + dx < 0 || startY + dy >= getHeight() || startY + dy < 0 || isObstacle(startX + dx, startY + dy)) { return 0; } else { return 1 - motionFailureProbability; } } else { return 0; } } /** * @see rl.MarkovDecisionProcess#sampleState(int, int) */ public int sampleState(int i, int a) { if (Distribution.random.nextDouble() < motionFailureProbability) { return i; } int nextState = -1; switch(a) { case MOVE_UP: nextState = stateFor(xFor(i), yFor(i) - 1); break; case MOVE_DOWN: nextState = stateFor(xFor(i), yFor(i) + 1); break; case MOVE_LEFT: nextState = stateFor(xFor(i) - 1, yFor(i)); break; case MOVE_RIGHT: nextState = stateFor(xFor(i) + 1, yFor(i)); break; default: nextState = i; } if (maze[yFor(nextState)][xFor(nextState)] == OBSTACLE) { nextState = i; } return nextState; } /** * @see rl.MarkovDecisionProcess#sampleInitialState() */ public int sampleInitialState() { return initial; } /** * @see rl.MarkovDecisionProcess#isTerminalState(int) */ public boolean isTerminalState(int state) { return state == goal; } /** * Load a maze from a text file * @param fileName the file to read from * @throws an exception when there's an error reading * the file */ public static MazeMarkovDecisionProcess load(String fileName) throws Exception { BufferedReader br = new BufferedReader(new FileReader(fileName)); int height = 1; String line = br.readLine(); int width = line.length(); while((line = br.readLine()) != null) { height++; } br.close(); char[][] maze = new char[height][width]; br = new BufferedReader(new FileReader(fileName)); int goalX = -1, goalY = -1; int initialX = -1, initialY = -1; for (int i = 0; i < maze.length; i++) { line = br.readLine(); for (int j = 0; j < maze[i].length; j++) { char c = line.charAt(j); if (c == AGENT) { initialX = j; initialY = i; maze[i][j] = EMPTY; } else if (c == GOAL) { goalX = j; goalY = i; maze[i][j] = EMPTY; } else if (c == OBSTACLE) { maze[i][j] = OBSTACLE; } else { maze[i][j] = EMPTY; } } } br.close(); return new MazeMarkovDecisionProcess(maze, goalX, goalY, initialX, initialY, FAILURE_PROBABILITY); } /** * Return a string representation * @return the string representation */ public String toString() { String ret = ""; int initialX = xFor(initial), initialY = yFor(initial); int goalX = xFor(goal), goalY = yFor(goal); for (int i = 0; i < maze.length; i++) { for (int j = 0; j < maze[i].length; j++) { if (i == initialY && j == initialX) { ret += "o"; } else if (i == goalY && j == goalX) { ret += "x"; } else { ret += maze[i][j]; } } ret += "\n"; } return ret; } }