/* * JABM - Java Agent-Based Modeling Toolkit * Copyright (C) 2013 Steve Phelps * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 3 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. */ package net.sourceforge.jabm.learning; import org.apache.log4j.Logger; import org.springframework.beans.factory.annotation.Required; import cern.jet.random.Uniform; import cern.jet.random.engine.RandomEngine; /** * <p> * An implementation of the epsilon-greedy action selection policy. * </p> * * <p> * See:<br> * Sutton, R. S., Barto, A. G., 1998. Reinforcement Learning: An Introduction. * MIT Press.<br> * </p> * * @author Steve Phelps * */ public class EpsilonGreedyActionSelector implements ActionSelector { /** * The parameter representing the probability of choosing a random action on * any given iteration. */ protected double epsilon; /** * |The pseudo-random number generator used to randomly select whether to * explore and to randomly select an action when the algorithm is exploring. */ protected RandomEngine prng; static Logger logger = Logger.getLogger(EpsilonGreedyActionSelector.class); public static final double DEFAULT_EPSILON = 0.01; public EpsilonGreedyActionSelector(double epsilon, RandomEngine prng) { super(); this.epsilon = epsilon; this.prng = prng; } public EpsilonGreedyActionSelector(RandomEngine prng) { this(DEFAULT_EPSILON, prng); } public EpsilonGreedyActionSelector() { this(DEFAULT_EPSILON, null); } @Override public int act(int state, MDPLearner qLearner) { if (prng.raw() <= epsilon) { // lastActionChosen = prng.choose(0, numActions-1); Uniform dist = new Uniform(prng); int randomAction = dist.nextIntFromTo(0, qLearner.getNumberOfActions() - 1); return randomAction; } else { return qLearner.bestAction(state); } } public double getEpsilon() { return epsilon; } public void setEpsilon(double epsilon) { this.epsilon = epsilon; if (logger.isDebugEnabled()) { logger.debug("epsilon = " + epsilon); } } public RandomEngine getPrng() { return prng; } @Required public void setPrng(RandomEngine prng) { this.prng = prng; } }