/* * JASA Java Auction Simulator API * Copyright (C) 2013 Steve Phelps * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. */ package net.sourceforge.jabm.learning; import java.io.Serializable; import net.sourceforge.jabm.prng.DiscreteProbabilityDistribution; import net.sourceforge.jabm.report.DataWriter; import net.sourceforge.jabm.util.MathUtil; import org.apache.log4j.Logger; import cern.jet.random.engine.RandomEngine; /** * <p> * A class implementing the Roth-Erev learning algorithm. This learning * algorithm is designed to mimic human-like behaviour in extensive form games. * See: * </p> * <p> * A.E.Roth and I. Erev "Learning in extensive form games: experimental data and * simple dynamic models in the intermediate term" Games and Economic Behiour, * Volume 8 * </p> * * @author Steve Phelps * @version $Revision: 106 $ */ public class RothErevLearner extends AbstractLearner implements StimuliResponseLearner, Serializable { /** * The number of choices available to make at each iteration. */ protected int k; /** * The recency parameter. */ protected double r; /** * The experimentation parameter. */ protected double e; /** * The scaling parameter. */ protected double s1; /** * Propensity for each possible action. */ protected double q[]; /** * Probabilities for each possible action. */ protected DiscreteProbabilityDistribution probabilities; /** * The current iteration. */ protected int iteration; /** * The last action chosen. */ protected int lastAction; /** * The total amount of update to the probability vector on the last iteration. */ protected double deltaP; static final int DEFAULT_K = 100; static final double DEFAULT_R = 0.1; static final double DEFAULT_E = 0.2; static final double DEFAULT_S1 = 1.0; static Logger logger = Logger.getLogger(RothErevLearner.class); /** * Construct a new learner. * * @param k * The no. of possible actions. * @param r * The recency parameter. * @param e * The experimentation parameter. */ public RothErevLearner(int k, double r, double e, double s1, RandomEngine prng) { this.k = k; this.r = r; this.e = e; this.s1 = s1; validateParams(); q = new double[k]; probabilities = new DiscreteProbabilityDistribution(prng, k); resetPropensities(); updateProbabilities(); iteration = 0; } public RothErevLearner(RandomEngine prng) { this(DEFAULT_K, DEFAULT_R, DEFAULT_E, DEFAULT_S1, prng); } public RothErevLearner(int k, RandomEngine prng) { this(k, DEFAULT_R, DEFAULT_E, DEFAULT_S1, prng); } public RothErevLearner(int k, RandomEngine prng, double[] propensities) { this(k, prng); setPropensities(propensities); } public Object protoClone() { // RothErevLearner clonedLearner; //TODO // try { // clonedLearner = (RothErevLearner) clone(); // clonedLearner.probabilities = (DiscreteProbabilityDistribution) probabilities.protoClone(); // } catch (CloneNotSupportedException e) { // throw new Error(e); // } return null; } protected void validateParams() { if (!(k > 0)) { throw new IllegalArgumentException("k must be positive"); } if (!(r >= 0 && r <= 1)) { throw new IllegalArgumentException("r must range [0..1]"); } if (!(e >= 0 && e <= 1)) { throw new IllegalArgumentException("e must range [0..1]"); } } /** * Generate the next action for this learner. * * @return An int in the range [0..k) representing the choice made by the * learner. */ public int act() { int action = choose(); lastAction = action; iteration++; return action; } /** * Reward the last action taken by the learner according to some payoff. * * @param reward * The payoff for the last action taken by the learner. */ public void reward(double reward) { assert reward >= 0; updatePropensities(lastAction, reward); updateProbabilities(); } /** * Choose a random number according to the probability distribution defined * by the probabilities. * * @return one of [0..k) according to the probabilities [0..k-1]. */ public int choose() { return probabilities.generateRandomEvent(); } /** * Update the propensities for each possible action. * * @param action * The last action chosen by the learner */ protected void updatePropensities(int action, double reward) { for (int i = 0; i < k; i++) { q[i] = (1 - r) * q[i] + experience(i, action, reward); } } /** * Update the probabilities from the propensities. */ protected void updateProbabilities() { double sigmaQ = 0; for (int i = 0; i < k; i++) { sigmaQ += q[i]; } deltaP = 0; for (int i = 0; i < k; i++) { double p1 = q[i] / sigmaQ; deltaP += MathUtil.diffSq(probabilities.getProbability(i), p1); probabilities.setProbability(i, p1); } } /** * The experience function * * @param i * The action under consideration * * @param action * The last action chosen */ public double experience(int i, int action, double reward) { if (i == action) { return reward * (1 - e); } else { return reward * (e / (k - 1)); } } /** * Replace the current propensities with the supplied propensity array. * * @param q * The new propensity array to use. */ public void setPropensities(double q[]) { this.q = q; updateProbabilities(); } public void resetPropensities() { double initialPropensity = s1 / k; for (int i = 0; i < k; i++) { q[i] = initialPropensity; probabilities.setProbability(i, 1.0/k); } } public void setRecency(double r) { this.r = r; validateParams(); } public void setExperimentation(double e) { this.e = e; validateParams(); } public void setScaling(double s1) { this.s1 = s1; } private static int sign(double value) { return (new Double(value)).compareTo(new Double(0)); } public void dumpState(DataWriter out) { for (int i = 0; i < k; i++) { out.newData(probabilities.getProbability(i)); } } /** * Get the total number of actions. */ public int getK() { return k; } /** * Get the total number of actions */ public int getNumberOfActions() { return getK(); } public double getLearningDelta() { return deltaP; } /** * Get the probability of the ith action. */ public double getProbability(int i) { return probabilities.getProbability(i); } /** * Get the probability distribution corresponding to the current * propensities. */ public DiscreteProbabilityDistribution getProbabilities() { return probabilities; } public double getE() { return e; } public int getIteration() { return iteration; } public int getLastAction() { return lastAction; } public double getR() { return r; } public double getS1() { return s1; } public String toString() { return "(" + getClass() + " k:" + k + " r:" + r + " e:" + e + " s1:" + s1 + " learningDelta:" + deltaP + ")"; } @Override public int bestAction() { double max = Double.NEGATIVE_INFINITY; int result = -1; for(int i=0; i<q.length; i++) { if (q[i] > max) { max = q[i]; result = i; } } return result; } @Override public int worstAction() { double max = Double.POSITIVE_INFINITY; int result = -1; for(int i=0; i<q.length; i++) { if (q[i] < max) { max = q[i]; result = i; } } return result; } }