/**
* Copyright (C) 1999-2007, Anthony Harrison anh23@pitt.edu This library is free
* software; you can redistribute it and/or modify it under the terms of the GNU
* Lesser General Public License as published by the Free Software Foundation;
* either version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details. You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.jactr.core.module.procedural.six.learning;
/*
* default logging
*/
import org.jactr.core.model.IModel;
import org.jactr.core.production.IProduction;
import org.jactr.core.production.six.ISubsymbolicProduction6;
public class DefaultExpectedUtilityEquation implements
IExpectedUtilityEquation
{
/**
*
*/
private DefaultProceduralLearningModule6 _proceduralLearningModule6;
/**
* @param defaultProceduralLearningModule6
*/
public DefaultExpectedUtilityEquation()
{
}
private DefaultProceduralLearningModule6 getLearningModule(IModel model)
{
if (_proceduralLearningModule6 == null)
_proceduralLearningModule6 = (DefaultProceduralLearningModule6) model
.getModule(DefaultProceduralLearningModule6.class);
return _proceduralLearningModule6;
}
public double computeExpectedUtility(IProduction production, IModel model,
double reward)
{
DefaultProceduralLearningModule6 dplm = getLearningModule(model);
ISubsymbolicProduction6 ssp = (ISubsymbolicProduction6) production
.getSubsymbolicProduction();
double previousUtility = ssp.getExpectedUtility();
if (Double.isNaN(previousUtility)) previousUtility = ssp.getUtility();
double partial = 0;
if (dplm.isParameterLearningEnabled()
&& !(Double.isNaN(reward) || Double.isInfinite(reward)))
partial = dplm.getParameterLearning() * (reward - previousUtility);
double utility = previousUtility + partial;
if (DefaultProceduralLearningModule6.LOGGER.isDebugEnabled())
DefaultProceduralLearningModule6.LOGGER.debug(production + ".expectedUtility=" + utility + " previous="
+ previousUtility + " partial=" + partial + " reward=" + reward
+ " rate="
+ dplm.getParameterLearning());
return utility;
}
}