QBrain.java example

Explorer
JavaMultiAgentSocial-master
- libraries
  - Blahut-Arimoto
    - IT.java
- mason
  - ec
    - util
      - MersenneTwisterFast.java
  - sim
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

package sim.app.socialsystem;
import org.ujmp.core.MatrixFactory;
import org.ujmp.core.Matrix;
import org.ujmp.core.calculation.Calculation.Ret;
/**
 *
 * @author epokh
 */
public class QBrain {
    public final double learningRate=0.5;
    private Matrix Q;

    private Matrix lookupState;
    private int lookupActions[];
    private int previousAction;
    private int previousState;
    private int previousReward;
    public int lastReward;

    public QBrain(int nstates,int nactions)
    {

        //create a sparse matrix Q containing the best pair of states-actions
        Q = MatrixFactory.dense(nstates, nactions);
        lookupState=MatrixFactory.dense(3,3);
        for(int col=0;col<nactions;col++)
            for(int row=0;row<nstates;row++)
                Q.setAsDouble(0.0, row,col);

    }

    public void setInitialAction(int orientation)
    {
        this.previousAction=orientation;
    }
    public void setInitialState(int error_reflex,int error_distal)
    {
    this.previousState=getStateIndex(error_reflex, error_distal);
    }
    //in the agent model there are 8 possible directions to be chosen
    //we code them simply from 0 to 7
    public void initActions(int ndirections)
    {
        lookupActions=new int[ndirections];
        lookupActions[0]=Agent.N;
        lookupActions[1]=Agent.NE;
        lookupActions[2]=Agent.E;
        lookupActions[3]=Agent.SE;
        lookupActions[4]=Agent.S;
        lookupActions[5]=Agent.SW;
        lookupActions[6]=Agent.W;
        lookupActions[7]=Agent.NW;

    }

    //there are 3x3 possible states we code them from 0 to 8
    public void initStates()
    {
        int code=0;
        for(int col=0;col<2;col++)
        {    for (int row=0;row<2;row++)
             {
                  lookupState.setAsInt(code, row,col);
                  code++;
             }
        }
    }

    public int getStateIndex(int error_reflex,int error_distal)
    {

        int col=error_reflex++;
        int row=error_distal++;
        return lookupState.getAsInt(row,col);
    }

    //return the best action according to the current state
    public int chooseAction(int error_reflex,int error_distal)
    {
        int present_state=getStateIndex(error_reflex, error_distal);
        int bestaction=findMaxAction(present_state,1);
        this.previousState=present_state;
        this.previousAction=bestaction;
        return bestaction;
    }
    //lookup for the future actions to get the best reward
    //we can choose the depth of prediction!
    public int findMaxAction(int state,int npredictions)
    {

        int maxQ=-1000;
 
        for(int k=0;k<Q.COLUMN;k++)
        {
            maxQ=(int) Math.max(Q.getAsDouble(state,k), maxQ);
        }
        int k=0;
        while(k<Q.COLUMN)
        {
            if(Q.getAsDouble(state,k)==maxQ)
                break;
        }
        return k;

    }
    //update the Q matrix using the reward received from the new state,
    //according to the last state and action chosen
    public void updateQvalues(int error_reflex,int error_distal)
    {
        double previousQ=Q.getAsDouble(previousState, previousAction);
        int newstate=getStateIndex(error_reflex, error_distal);
        int bestaction=findMaxAction(newstate,1);
        double nextQ=previousQ+learningRate*(lastReward+bestaction-previousQ);
        Q.setAsDouble(nextQ,previousState, previousAction);
        
    }

}