SimpleMarkovDecisionProcess.java example

Explorer
GATECH-master
package rl;

import shared.Instance;
import dist.Distribution;
import dist.UnivariateGaussian;
import dist.hmm.ModularHiddenMarkovModel;
import dist.hmm.SimpleStateDistribution;
import dist.hmm.SimpleStateDistributionTable;
import dist.hmm.StateDistribution;

/**
 * A markov decision process with rewards unconditional on actions
 * implemented using an input output hidden markov model
 * @author Andrew Guillory gtg008g@mail.gatech.edu
 * @version 1.0
 */
public class SimpleMarkovDecisionProcess extends ModularHiddenMarkovModel implements MarkovDecisionProcess {
    /**
     * Set the reward values
     * @param rewardValues the reward values
     */
    public void setRewards(double[] rewardValues) {
        Distribution[] outputs = new Distribution[rewardValues.length];
        for (int i = 0; i < rewardValues.length; i++) {
            outputs[i] = new UnivariateGaussian(rewardValues[i], 1);
        }
        setOutputDistributions(outputs);
    }
    
    /**
     * Get the rewards
     * @return the rewards
     */
    public double[] getRewards() {
       double[] rewards = new double[getStateCount()];
       for (int i = 0; i < rewards.length; i++) {
           rewards[i] = ((UnivariateGaussian) getOutputDistributions()[i]).getMean();
       }
       return rewards;
    }
    
    /**
     * Get the reward for a state
     * @param state the state
     * @return the reward
     */
    public double reward(int state, int action) {
        return ((UnivariateGaussian) getOutputDistributions()[state]).getMean();
    }
    
    /**
     * Set the transition matrices
     * @param matrices the matrices
     */
    public void setTransitionMatrices(double[][][] matrices) {
        StateDistribution[] transitions = new StateDistribution[matrices.length];
        for (int i = 0; i < matrices.length; i++) {
            transitions[i] = new SimpleStateDistributionTable(matrices[i]);
        }
        setTransitionDistributions(transitions);
    }
    
    /**
     * Get the transition matrices
     * @return the transition matrices
     */
    public double[][][] getTransitionMatrices() {
       double[][][] matrices = new double[getStateCount()][][];
       for (int i = 0; i < matrices.length; i++) {
           matrices[i] = ((SimpleStateDistributionTable) 
               getTransitionDistributions()[i]).getProbabilityMatrix();
       }
       return matrices;
    }
    
    /**
     * Get the probability of transitioning from state i to state j,
     * with observation o
     * @param i the first state
     * @param j the second state
     * @param a the action
     * @return the probability
     */
    public double transitionProbability(int i, int j, int a) {
        return ((SimpleStateDistributionTable) getTransitionDistributions()[i])
            .getProbabilityMatrix()[a][j];
    }
    
    /**
     * Sample a next state given the current state and input
     * @param i the current state
     * @param a the action
     * @return the next state
     */
    public int sampleState(int i, int a) {
        return getTransitionDistributions()[i].generateRandomState(new Instance(a));
    }
    
    /**
     * Get the action count
     * @return the action count
     */
    public int getActionCount() {
        return ((SimpleStateDistributionTable) 
            getTransitionDistributions()[0]).getInputRange();
    }
    
    /**
     * Set the initial state
     * @param i the new initial state
     */
    public void setInitialState(int i) {
        double[] p = new double[getStateCount()];
        p[i] = 1;
        setInitialStateDistribution(new SimpleStateDistribution(p));
    }

    /**
     * @see rl.MarkovDecisionProcess#sampleInitialState()
     */
    public int sampleInitialState() {
        return sampleInitialState(null);
    }

    /**
     * @see rl.MarkovDecisionProcess#isTerminalState(int)
     */
    public boolean isTerminalState(int state) {
        return false;
    }
}