package rl; import shared.Instance; import dist.Distribution; import dist.UnivariateGaussian; import dist.hmm.ModularHiddenMarkovModel; import dist.hmm.SimpleStateDistribution; import dist.hmm.SimpleStateDistributionTable; import dist.hmm.StateDistribution; /** * A markov decision process with rewards unconditional on actions * implemented using an input output hidden markov model * @author Andrew Guillory gtg008g@mail.gatech.edu * @version 1.0 */ public class SimpleMarkovDecisionProcess extends ModularHiddenMarkovModel implements MarkovDecisionProcess { /** * Set the reward values * @param rewardValues the reward values */ public void setRewards(double[] rewardValues) { Distribution[] outputs = new Distribution[rewardValues.length]; for (int i = 0; i < rewardValues.length; i++) { outputs[i] = new UnivariateGaussian(rewardValues[i], 1); } setOutputDistributions(outputs); } /** * Get the rewards * @return the rewards */ public double[] getRewards() { double[] rewards = new double[getStateCount()]; for (int i = 0; i < rewards.length; i++) { rewards[i] = ((UnivariateGaussian) getOutputDistributions()[i]).getMean(); } return rewards; } /** * Get the reward for a state * @param state the state * @return the reward */ public double reward(int state, int action) { return ((UnivariateGaussian) getOutputDistributions()[state]).getMean(); } /** * Set the transition matrices * @param matrices the matrices */ public void setTransitionMatrices(double[][][] matrices) { StateDistribution[] transitions = new StateDistribution[matrices.length]; for (int i = 0; i < matrices.length; i++) { transitions[i] = new SimpleStateDistributionTable(matrices[i]); } setTransitionDistributions(transitions); } /** * Get the transition matrices * @return the transition matrices */ public double[][][] getTransitionMatrices() { double[][][] matrices = new double[getStateCount()][][]; for (int i = 0; i < matrices.length; i++) { matrices[i] = ((SimpleStateDistributionTable) getTransitionDistributions()[i]).getProbabilityMatrix(); } return matrices; } /** * Get the probability of transitioning from state i to state j, * with observation o * @param i the first state * @param j the second state * @param a the action * @return the probability */ public double transitionProbability(int i, int j, int a) { return ((SimpleStateDistributionTable) getTransitionDistributions()[i]) .getProbabilityMatrix()[a][j]; } /** * Sample a next state given the current state and input * @param i the current state * @param a the action * @return the next state */ public int sampleState(int i, int a) { return getTransitionDistributions()[i].generateRandomState(new Instance(a)); } /** * Get the action count * @return the action count */ public int getActionCount() { return ((SimpleStateDistributionTable) getTransitionDistributions()[0]).getInputRange(); } /** * Set the initial state * @param i the new initial state */ public void setInitialState(int i) { double[] p = new double[getStateCount()]; p[i] = 1; setInitialStateDistribution(new SimpleStateDistribution(p)); } /** * @see rl.MarkovDecisionProcess#sampleInitialState() */ public int sampleInitialState() { return sampleInitialState(null); } /** * @see rl.MarkovDecisionProcess#isTerminalState(int) */ public boolean isTerminalState(int state) { return false; } }