/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package sim.app.socialsystem; import org.ujmp.core.MatrixFactory; import org.ujmp.core.Matrix; import org.ujmp.core.calculation.Calculation.Ret; /** * * @author epokh */ public class QBrain { public final double learningRate=0.5; private Matrix Q; private Matrix lookupState; private int lookupActions[]; private int previousAction; private int previousState; private int previousReward; public int lastReward; public QBrain(int nstates,int nactions) { //create a sparse matrix Q containing the best pair of states-actions Q = MatrixFactory.dense(nstates, nactions); lookupState=MatrixFactory.dense(3,3); for(int col=0;col<nactions;col++) for(int row=0;row<nstates;row++) Q.setAsDouble(0.0, row,col); } public void setInitialAction(int orientation) { this.previousAction=orientation; } public void setInitialState(int error_reflex,int error_distal) { this.previousState=getStateIndex(error_reflex, error_distal); } //in the agent model there are 8 possible directions to be chosen //we code them simply from 0 to 7 public void initActions(int ndirections) { lookupActions=new int[ndirections]; lookupActions[0]=Agent.N; lookupActions[1]=Agent.NE; lookupActions[2]=Agent.E; lookupActions[3]=Agent.SE; lookupActions[4]=Agent.S; lookupActions[5]=Agent.SW; lookupActions[6]=Agent.W; lookupActions[7]=Agent.NW; } //there are 3x3 possible states we code them from 0 to 8 public void initStates() { int code=0; for(int col=0;col<2;col++) { for (int row=0;row<2;row++) { lookupState.setAsInt(code, row,col); code++; } } } public int getStateIndex(int error_reflex,int error_distal) { int col=error_reflex++; int row=error_distal++; return lookupState.getAsInt(row,col); } //return the best action according to the current state public int chooseAction(int error_reflex,int error_distal) { int present_state=getStateIndex(error_reflex, error_distal); int bestaction=findMaxAction(present_state,1); this.previousState=present_state; this.previousAction=bestaction; return bestaction; } //lookup for the future actions to get the best reward //we can choose the depth of prediction! public int findMaxAction(int state,int npredictions) { int maxQ=-1000; for(int k=0;k<Q.COLUMN;k++) { maxQ=(int) Math.max(Q.getAsDouble(state,k), maxQ); } int k=0; while(k<Q.COLUMN) { if(Q.getAsDouble(state,k)==maxQ) break; } return k; } //update the Q matrix using the reward received from the new state, //according to the last state and action chosen public void updateQvalues(int error_reflex,int error_distal) { double previousQ=Q.getAsDouble(previousState, previousAction); int newstate=getStateIndex(error_reflex, error_distal); int bestaction=findMaxAction(newstate,1); double nextQ=previousQ+learningRate*(lastReward+bestaction-previousQ); Q.setAsDouble(nextQ,previousState, previousAction); } }