/* * Encog(tm) Core v3.4 - Java Version * http://www.heatonresearch.com/encog/ * https://github.com/encog/encog-java-core * Copyright 2008-2016 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.ml.world.learning.mdp; import org.encog.ml.world.Action; import org.encog.ml.world.State; import org.encog.ml.world.SuccessorState; import org.encog.ml.world.World; public class ValueIteration extends MarkovDecisionProcess { private double discountFactor; public ValueIteration(World theWorld, double theDiscountFactor) { super(theWorld); this.discountFactor = theDiscountFactor; } public void calculateValue(State state) { double result = Double.NEGATIVE_INFINITY; if (!getWorld().isGoalState(state) ) { for (Action action : getWorld().getActions()) { double sum = 0; for (SuccessorState statePrime : this.getWorld() .getProbability() .determineSuccessorStates(state, action)) { sum += statePrime.getProbability() * statePrime.getState().getPolicyValue()[0]; } sum *= this.discountFactor; result = Math.max(result, sum); } state.getPolicyValue()[0] = result+state.getReward(); } else { state.getPolicyValue()[0] = state.getReward(); } } public void iteration() { for(State state: getWorld().getStates() ) { calculateValue(state); } } }