/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package hh.heuristicselectors;
import hh.creditassigment.Credit;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import org.moeaframework.core.Variation;
/**
* Implements the fitness-rate-rank-based multiarmed bandit
*
* @author SEAK2
*/
public class FRRMAB extends AbstractMAB {
/**
* Sliding window to store FIR values
*/
private final LinkedList<FIR> window;
/**
* Size of the sliding window
*/
private final int windowSize;
/**
* Rank decay value
*/
private final double d;
private final OperatorRewardComparator comp;
public FRRMAB(Collection<Variation> operators, double c, int windowSize, double d) {
super(operators, c);
this.window = new LinkedList<>();
this.windowSize = windowSize;
this.d = d;
this.comp = new OperatorRewardComparator();
}
/**
* This implementation inserts the reward in the FIR sliding window
*
* @param reward
* @param heuristic
*/
@Override
public void update(Credit reward, Variation heuristic) {
//update the sliding window
window.addLast(new FIR(heuristic, reward));
if (window.size() > windowSize) {
window.removeFirst();
}
//update qualities
HashMap<Variation, Double> rewardSums = computeRewardSum();
HashMap<Variation, Double> FRR = computeFRR(rewardSums);
for (Variation op : operators) {
if(FRR.get(op)!=null) //this only = null before all operators get selected at least once
qualities.put(op, FRR.get(op));
}
}
private HashMap<Variation, Double> computeRewardSum() {
HashMap<Variation, Double> out = new HashMap<>();
for (Variation op : operators) {
out.put(op, 0.0);
}
//sum rewards
for (FIR fir : window) {
Variation op = fir.getOperator();
out.put(op, out.get(op) + fir.value.getValue());
}
return out;
}
/**
* Computes and returns the fitness rate rank FRR of each operator
*
* @param rewardSums
* @return FRR values of each operator
*/
private HashMap<Variation, Double> computeFRR(HashMap<Variation, Double> rewardSums) {
//find rank of each operator
ArrayList<OperatorRewardPair> rewardSumSorted = new ArrayList<>(operators.size());
for (Variation op : operators) {
rewardSumSorted.add(new OperatorRewardPair(op, rewardSums.get(op)));
}
//sort in descending order
Collections.sort(rewardSumSorted, comp);
// compute decay values
HashMap<Variation, Double> decay = new HashMap<>();
double sumDecay = 0;
for (int i = 0; i < rewardSumSorted.size(); i++) {
OperatorRewardPair orp = rewardSumSorted.get(i);
double decay_i = Math.pow(d, i + 1) * orp.getReward();
decay.put(orp.getOperator(), decay_i);
sumDecay += decay_i;
}
//compute FRR
HashMap<Variation, Double> out = new HashMap<>();
for (Variation op : operators) {
qualities.put(op, decay.get(op) / sumDecay);
}
return out;
}
/**
* Data structure to store fitness improvement rate value and the operator
* responsible for it
*/
private class FIR {
private final Variation operator;
private final Credit value;
public FIR(Variation operator, Credit value) {
this.operator = operator;
this.value = value;
}
/**
* returns the operator responsible for the FIR
*
* @return
*/
public Variation getOperator() {
return operator;
}
/**
* Returns the value of the FIR
*
* @return
*/
public Credit getValue() {
return value;
}
}
/**
* An ordered pair of the operator and a Credit
*/
private class OperatorRewardPair {
private final Variation operator;
private final double reward;
public OperatorRewardPair(Variation operator, double reward) {
this.operator = operator;
this.reward = reward;
}
public Variation getOperator() {
return operator;
}
public double getReward() {
return reward;
}
}
/**
* Compares operator reward pairs based on the reward value. Will sort in descending order.
*
* @return the value 0 if t1 is has the same reward value as t2; a value
* greater than 0 if t1 has a reward value less than that of t2; and a value
* less than 0 if t1 has a reward value greater than that of t2.
*/
private class OperatorRewardComparator implements Comparator<OperatorRewardPair> {
@Override
public int compare(OperatorRewardPair t1, OperatorRewardPair t2) {
return -Double.compare(t1.getReward(), t2.getReward());
}
}
@Override
public String toString() {
return "FRRMAB";
}
}