MDP.java example

Explorer
CS_Coursework-master
package mdps;

public interface MDP {

    int numStates();
    int numActions();

    // the next state distribution. If s' is the numerical index of a given next state,
    // then nextStateDistribution(s, a)[s'] is the probability of that state given s, a.
    // P(s' | s, a)
    double[] nextStateDistribution(int state, int action);
    // returns the immediate reward for this state
    double getReward(int state, int action);

    // should the current episode terminate when the agent reaches this state?
    public boolean isTerminalState(int stateId);

}