/* * JASA Java Auction Simulator API * Copyright (C) 2013 Steve Phelps * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. */ package net.sourceforge.jabm.learning; import java.io.Serializable; import net.sourceforge.jabm.util.Prototypeable; import net.sourceforge.jabm.util.Resetable; import org.springframework.beans.factory.annotation.Required; import cern.jet.random.engine.RandomEngine; /** * A memory-less version of the Q-Learning algorithm. * * This class implements StimuliResponseLearner instead of MDPLearner, and so * can be used in place of, e.g. a RothErevLearner. * * We use the standard MDP QLearner class, but fool it with this wrapper into * thinking that there is only one state. * * @author Steve Phelps * @version $Revision: 189 $ */ public class StatelessQLearner extends AbstractLearner implements StimuliResponseLearner, Resetable, Serializable, Prototypeable { QLearner qLearner; public StatelessQLearner(RandomEngine prng) { qLearner = new QLearner(prng); } public StatelessQLearner(int numActions, double learningRate, double discountRate, RandomEngine prng) { qLearner = new QLearner(1, numActions, learningRate, discountRate, prng); } public StatelessQLearner(int numActions, RandomEngine prng) { this(numActions,QLearner.DEFAULT_LEARNING_RATE, QLearner.DEFAULT_DISCOUNT_RATE, prng); } public int act() { return qLearner.act(); } public int bestAction() { return qLearner.bestAction(0); } public int worstAction() { return qLearner.worstAction(0); } public double getDiscountRate() { return qLearner.getDiscountRate(); } public int getLastActionChosen() { return qLearner.getLastActionChosen(); } public double getLearningRate() { return qLearner.getLearningRate(); } public int getNumActions() { return qLearner.getNumberOfActions(); } public int getPreviousState() { return qLearner.getPreviousState(); } public RandomEngine getPrng() { return qLearner.getPrng(); } public int getState() { return qLearner.getState(); } public void initialise() { qLearner.initialise(); } public double maxQ(int newState) { return qLearner.maxQ(newState); } public void setDiscountRate(double discountRate) { qLearner.setDiscountRate(discountRate); } public void setLearningRate(double learningRate) { qLearner.setLearningRate(learningRate); } public String toString() { return qLearner.toString(); } public void reward(double reward) { qLearner.newState(reward, 0); } public void reset() { qLearner.reset(); } public double getLearningDelta() { return qLearner.getLearningDelta(); } public int getNumberOfActions() { return qLearner.getNumberOfActions(); } public void setNumberOfActions(int n) { qLearner.setStatesAndActions(1, n); } public QLearner getqLearner() { return qLearner; } public void setqLearner(QLearner qLearner) { this.qLearner = qLearner; } public ActionSelector getActionSelector() { return qLearner.getActionSelector(); } @Required public void setActionSelector(ActionSelector actionSelector) { qLearner.setActionSelector(actionSelector); } public void dumpState(net.sourceforge.jabm.report.DataWriter out) { qLearner.dumpState(out); } public Object protoClone() { try { StatelessQLearner cloned = (StatelessQLearner) this.clone(); cloned.qLearner = (QLearner) this.qLearner.protoClone(); return cloned; } catch (CloneNotSupportedException e) { throw new Error(e); } } public void setInitialQValue(double initialQ) { qLearner.setInitialQValue(initialQ); } public double getInitialQValue() { return qLearner.getInitialQValue(); } }