package mdps; public interface MDP { int numStates(); int numActions(); // the next state distribution. If s' is the numerical index of a given next state, // then nextStateDistribution(s, a)[s'] is the probability of that state given s, a. // P(s' | s, a) double[] nextStateDistribution(int state, int action); // returns the immediate reward for this state double getReward(int state, int action); // should the current episode terminate when the agent reaches this state? public boolean isTerminalState(int stateId); }