/* Nathaniel Lim
* CS373 - Final Project
* May 18, 2010
* Implicit Imitation
*/
package project;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Random;
import java.util.Scanner;
public class ImitationSim {
private boolean RANDOM_START = false;
private ChessBoard board;
private Piece[] mentors;
private Piece[] observers;
private PieceType mentorType;
private PieceType observerType;
private Random r = new Random();
private static final int ACTION_LIMIT = 30;
public ImitationSim(String boardFile, PieceType m, PieceType o){
board = new ChessBoard(boardFile, m, o);
mentorType = m;
observerType = o;
}
public ChessBoard getBoard(){
return board;
}
public enum PieceType{
KING, KNIGHT, TWOSTEP, CARD;
}
public static void main (String [] args) throws IOException{
//Parsing inputs to the Simulation.
Scanner in = new Scanner(System.in);
int numAgents, numEpisodes;
PieceType m, o;
System.out.print("Number of Agents: ");
numAgents = in.nextInt();
System.out.print("Number of Episodes: ");
numEpisodes = in.nextInt();
InputStreamReader isr = new InputStreamReader(System.in);
BufferedReader br = new BufferedReader(isr);
System.out.println("Give: 'MENTORTYPE, OBSERVERTYPE' (Types: King, Knight, TwoStep, Card):");
String typeString = br.readLine();
String [] types = typeString.split(", ");
if (types[0].equalsIgnoreCase("KNIGHT")){
m = PieceType.KNIGHT;
System.out.println("Knight Mentors");
} else if (types[0].equalsIgnoreCase("KING")) {
m = PieceType.KING;
System.out.println("King Mentors");
} else if(types[0].equalsIgnoreCase("CARD")) {
m = PieceType.CARD;
System.out.println("Cardinal Mentors");
} else {
m = PieceType.TWOSTEP;
System.out.println("TwoStep Mentors");
}
if (types[1].equalsIgnoreCase("KNIGHT")){
o = PieceType.KNIGHT;
System.out.println("Knight Observers");
} else if (types[1].equalsIgnoreCase("KING")) {
o = PieceType.KING;
System.out.println("King Observers");
} else if(types[1].equalsIgnoreCase("CARD")) {
o = PieceType.CARD;
System.out.println("Cardinal Observers");
} else {
o = PieceType.TWOSTEP;
System.out.println("TwoStep Observers");
}
String boardFile = "/home/cs-students/njl2/cs373/Final/Imitation/src/project/board2.txt";
if(args.length==1){
boardFile = args[0];
}
//Instantiate the Simulation
ImitationSim sim = new ImitationSim(boardFile, m, o);
//Set the Learning Type.
System.out.println("Give 'q' for Qlearning");
String learningString = br.readLine();
boolean qlearning = false;
if(learningString.equalsIgnoreCase("q")){
qlearning = true;
System.out.println("Using Q Learning");
} else {
System.out.println("Using Augmented-Bellman-Backups");
}
sim.getBoard().setLearning(qlearning);
//Set the Starting Spot Schema
System.out.println("Give 'r' for Random Start States for Episodes");
String ss = br.readLine();
if(ss.equalsIgnoreCase("r")){
sim.RANDOM_START= true;
}
//Start running Simulation
System.out.print("Starting Simulation ");
if (sim.RANDOM_START){
System.out.println("with a Random start state for an episode");
} else {
System.out.println("with (0,0) as the start state for an episode");
}
sim.mentors = new Piece[numAgents];
sim.observers = new Piece[numAgents];
System.out.println("World: \n" + sim.getBoard());
double mentorReward = 0;
double observerReward = 0;
//Set up the Mentor pieces
boolean isObserver = false;
for (int i = 0; i < sim.mentors.length; i++){
if (sim.mentorType == PieceType.KING){
sim.mentors[i] = new King(sim.getBoard() , isObserver, null);
} else if (sim.mentorType == PieceType.KNIGHT){
sim.mentors[i] = new Knight(sim.getBoard() , isObserver, null);
} else if (sim.mentorType == PieceType.CARD){
sim.mentors[i] = new Cardinal(sim.getBoard(), isObserver, null);
} else {
sim.mentors[i] = new TwoStep(sim.getBoard() , isObserver, null);
}
}
//Run Episodes on Mentors
System.out.print("Mentor Learning with actions: ");
for (int a = 0; a < sim.mentors[0].actions.length; a++){
System.out.print("(" + sim.mentors[0].actions[a].dx() + ", " + sim.mentors[0].actions[a].dy() + ")");
}
System.out.println("");
for (int e = 0; e < numEpisodes; e++){
for (int i = 0; i < sim.mentors.length; i++){
mentorReward += sim.runEpisode(sim.mentors[i]);
//System.out.println(sim.mentors[i].tally[sim.getBoard().getStateId(2, 2)][sim.getBoard().getStateId(3, 3)]);
}
System.out.println("Episode: " + (e+1) + " Average Reward: " + mentorReward/sim.mentors.length);
mentorReward = 0;
}
//Set up the Observers, which randomly selected mentors
isObserver = false;
for (int i = 0; i < sim.observers.length; i++){
int mentorIndex = i;
if (sim.observerType == PieceType.KING){
sim.observers[i] = new King(sim.getBoard() , isObserver, sim.mentors[mentorIndex]);
} else if (sim.observerType == PieceType.KNIGHT){
sim.observers[i] = new Knight(sim.getBoard() , isObserver, sim.mentors[mentorIndex]);
} else if (sim.observerType == PieceType.CARD){
sim.observers[i] = new Cardinal(sim.getBoard(), isObserver, sim.mentors[mentorIndex]);
} else {
sim.observers[i] = new TwoStep(sim.getBoard() , isObserver, sim.mentors[mentorIndex]);
}
}
//Run Episodes on the Observers
System.out.print("Observer Learning with actions:");
for (int a = 0; a < sim.observers[0].actions.length; a++){
System.out.print("(" + sim.observers[0].actions[a].dx() + ", " + sim.observers[0].actions[a].dy() + ")");
}
System.out.println("");
for (int e = 0; e < numEpisodes; e++){
for (int i = 0; i < sim.observers.length; i++){
observerReward += sim.runEpisode(sim.observers[i]);
//System.out.println(sim.observers[i].tally[sim.getBoard().getStateId(3, 2)][sim.getBoard().getStateId(3, 3)]);
}
System.out.println("Episode: " + (e+1) + " Average Reward: " + observerReward/sim.observers.length);
observerReward = 0;
}
System.out.println("End of Simulation");
}
//Wander a Piece around in the world following a policy using a piece.bestNextAction()
private double runEpisode(Piece piece) {
double reward = 0.0;
Coordinate pos;
PieceAction action;
int actionsTaken = 0;
int s, t;
if (RANDOM_START){
pos = new Coordinate(r.nextInt(this.getBoard().sizeX()), r.nextInt(this.getBoard().sizeY()));
} else {
pos = new Coordinate(0, 0);
}
piece.setPosition(pos);
while(!board.isTerminalState(board.getStateId(piece.getPosition().getX(), piece.getPosition().getY())) ){
reward-=board.ACTION_COST;
//if(piece.isObserver){System.out.print(piece.getPosition() + "->" );}
int bA = piece.getNextAction();
if (piece.isObserver()){
action = board.getObserverActions()[bA];
} else{
action = board.getMentorActions()[bA];
}
s = board.getStateId(piece.getPosition().getX(), piece.getPosition().getY());
//piece.printQMap(s);
piece.setPosition(board.nextState(piece.getPosition(), action));
t = board.getStateId(piece.getPosition().getX(), piece.getPosition().getY());
piece.updateTally(s, bA, t);
piece.updateQFunction(s, bA, t);
actionsTaken++;
}
//Termination!!
//System.out.print(piece.getPosition());
reward += board.getReward(board.getStateId(piece.getPosition().getX(), piece.getPosition().getY()));
piece.updateVFunction(board.getStateId(piece.getPosition().getX(), piece.getPosition().getY()), -1, 0);
for(int a = 0; a < piece.actions.length; a++){
piece.updateQFunction(board.getStateId(piece.getPosition().getX(), piece.getPosition().getY()), a, -1);
}
return reward;
}
}