package mdps;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.Arrays;
/**
* This class implements the MDP interface. It scans a text file to construct a gridworld,
* with grid locations which an agent can navigate, using NORTH/SOUTH/EAST/WEST actions, and
* obstacles.
*
* The format for gridworld files is:
*
* line 1: x dimension (integer)
* line 2: y dimension (integer)
* Then an x X y matrix of values, either "W" (for walls) or a real value representing the
* immediate reward in each accessible state.
*
* For example:
*
* 3
* 5
* 0 0 W 0 4
* W 0 0 0 0
* -2 0 0 W 0
*
* @author pippin
*
*/
public class Gridworld implements MDP {
/**
* An x/y coordinate in the gridworld.
*
* @author pippin
*
*/
public class Coordinate {
private int x, y;
public Coordinate(int x, int y) {
super();
this.x = x;
this.y = y;
}
public int getX() {
return x;
}
public int getY() {
return y;
}
}
/**
* For this assignment, you'll be using the SIDE noise type.
* @author pippin
*
*/
public enum NoiseType {
BASIC, SIDE;
}
/**
* East, west, north, south actions. These are consistent with each other, but may not correspond with
* up = north, etc, in the GUI.
* @author pippin
*
*/
public enum GridworldAction {
EAST(1, 0) {
public GridworldAction getClockwiseAction() {return SOUTH;}
public GridworldAction getCounterClockwiseAction() {return NORTH;}
},
SOUTH(0, -1){
public GridworldAction getClockwiseAction() {return WEST;}
public GridworldAction getCounterClockwiseAction() {return EAST;}
},
WEST(-1, 0){
public GridworldAction getClockwiseAction() {return NORTH;}
public GridworldAction getCounterClockwiseAction() {return SOUTH;}
},
NORTH(0, 1){
public GridworldAction getClockwiseAction() {return EAST;}
public GridworldAction getCounterClockwiseAction() {return WEST;}
};
private int xChange, yChange;
private GridworldAction(int xChange, int yChange) {
this.xChange = xChange;
this.yChange = yChange;
}
public int getActionId() {
return ordinal();
}
public abstract GridworldAction getClockwiseAction();
public abstract GridworldAction getCounterClockwiseAction();
}
private int[] actionIds;
private GridworldAction[] actions = GridworldAction.values();
private boolean[][] wallMatrix;
private double[][] rewardMatrix;
private int numStates;
private NoiseType noiseType = NoiseType.SIDE;
private double noise = 0.2;
private int[][] stateLabels;
private ArrayList<Coordinate> stateList = new ArrayList<Coordinate>();
private int xDim;
private int yDim;
/**
* Reads in a gridworld from a file. See Gridworld class comments for file format.
* @param fileName
*/
public Gridworld(String fileName) {
// read in the array of locations
try {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
StreamTokenizer token = new StreamTokenizer(reader);
token.nextToken();
xDim = (int) token.nval;
token.nextToken();
yDim = (int) token.nval;
wallMatrix = new boolean[xDim][yDim];
rewardMatrix = new double[xDim][yDim];
for (int i = 0; i < xDim; i++) {
for (int j = 0; j < yDim; j++) {
token.nextToken();
if (token.ttype == StreamTokenizer.TT_NUMBER) {
rewardMatrix[i][j] = token.nval;
wallMatrix[i][j] = false;
} else {
wallMatrix[i][j] = true;
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
initialize();
}
private void initialize() {
actionIds = new int[actions.length];
for (int i = 0; i < actions.length; i++) {
actionIds[i] = actions[i].getActionId();
}
stateLabels = new int[xDim][yDim];
int count = 0;
for (int i = 0; i < xDim; i++) {
for (int j = 0; j < yDim; j++) {
if(!wallMatrix[i][j]) {
stateLabels[i][j] = count;
stateList.add(new Coordinate(i, j));
count++;
}
}
}
numStates = count;
}
/*
* the following methods implement the MDP interface.
*/
/**
* the number of actions available. All actions are available in all states.
*/
public int numActions() {
return actions.length;
}
/**
* The number of states (non-wall locations).
*/
public int numStates() {
return numStates;
}
/**
* The next state distribution for the given state and action. Actions are noisy, with errors resulting in
* moves to the side (if the intended action is NORTH, noise may cause the agent to move EAST or WEST instead).
*
* If a location has a non-zero reward value, it is assumed to be a terminal state and the transition vector
* consists entirely of zeros.
*
* This made it easier for me to implement ValueIteration, however, if you would prefer a different approach
* you may comment out the terminal state handling lines below.
*/
public double[] nextStateDistribution(int stateId, int actionNum) {
double[] transitionRow = new double[numStates()];
Arrays.fill(transitionRow, 0.0);
Coordinate coords = getStateCoords(stateId);
// if this is a terminal state, return immediately
if (rewardMatrix[coords.getX()][coords.getY()] != 0)
return transitionRow;
int nextState = getNeighborState(coords, actions[actionNum]);
transitionRow[nextState] = 1.0-noise;
switch(noiseType) {
case BASIC:
{
transitionRow[stateId] = (noise + transitionRow[stateId]);
break;
}
case SIDE: {
int cwState = getNeighborState(coords, actions[actionNum].getClockwiseAction());
transitionRow[cwState] = noise/2.0 + transitionRow[cwState];
int ccwState = getNeighborState(coords, actions[actionNum].getCounterClockwiseAction());
transitionRow[ccwState] = noise/2.0 + transitionRow[ccwState];
break;
}
}
return transitionRow;
}
/**
* Find the state index of the state in the indicated direction from the original coordinates.
* If the action is invalid (goes off the grid or into a wall), the state at the original coordinates is returned.
* @param coords
* @param action
* @return
*/
private int getNeighborState(Coordinate coords, GridworldAction action) {
int nextX = coords.getX() + action.xChange;
int nextY = coords.getY() + action.yChange;
if (nextX < 0 || nextY < 0 || nextX >= xDim || nextY >= yDim || wallMatrix[nextX][nextY]) {
nextX = coords.getX();
nextY = coords.getY();
}
return getStateId(nextX, nextY);
}
/**
* Get the state id corresponding to a particular coordinate location.
* @param x
* @param y
* @return
*/
protected int getStateId(int x, int y) {
return stateLabels[x][y];
}
/**
* Get the coordinates corresponding to a particular state label.
* @param stateId
* @return
*/
public Coordinate getStateCoords(int stateId) {
return stateList.get(stateId);
}
public String toString() {
return wallMatrix.toString();
}
/**
* Return a boolean matrix indicating the location of walls.
* @return
*/
public boolean[][] getWallMatrix() {
return wallMatrix;
}
/**
* Any state with a non-zero immediate reward value is considered to be a terminal
* state in this MDP.
*/
public boolean isTerminalState(int stateId) {
Coordinate coords = getStateCoords(stateId);
if (rewardMatrix[coords.getX()][coords.getY()] != 0)
return true;
else
return false;
}
/**
* Returns the immediate reward value specified by the input text file.
*/
public double getReward(int state, int action) {
Coordinate coord = getStateCoords(state);
return rewardMatrix[coord.getX()][coord.getY()];
}
}