package org.arbeitspferde.groningen.scorer;
import com.google.common.annotations.VisibleForTesting;
import com.google.inject.Inject;
import org.arbeitspferde.groningen.common.EvaluatedSubject;
import org.arbeitspferde.groningen.utility.Clock;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
* HistoricalBestPerformerScorer that uses the generation number as a major weight in scoring.
*
* The individual's score for this iteration will be weighted (multiplied) by the iteration
* number.
*
* The best performer scoring will combine individuals both within an experiment if they are
* duplicated AND will combine scores across iterations. Namely, within the same generation
* individuals with the same command line (args and arg values) will be averaged.
*
* Across iterations, the individual's previous score will be combined with to its score in this
* iteration - the individual will be represented by a single @{link EvaluatedSubject} with the
* current iteration number and a score equal to the combination of the previous plus current
* scores.
*/
public class GenerationNumberWeightedBestPerformerScorer implements HistoricalBestPerformerScorer {
/**
* To create new instances of {@link EvaluatedSubject EvaluatedSubjects} when merging existing
* instance in the store.
*/
private final Clock clock;
/** Stores the all-time unique and merged evaluated subjects with greatest to least ordering. */
@VisibleForTesting
final
List<EvaluatedSubject> alltimeEvaluatedSubjects = new ArrayList<>();
@Inject
public GenerationNumberWeightedBestPerformerScorer(Clock clock) {
this.clock = clock;
}
/** @see HistoricalBestPerformerScorer#getBestPerformers() */
@Override
public List<EvaluatedSubject> getBestPerformers() {
synchronized (alltimeEvaluatedSubjects) {
return new ArrayList<>(alltimeEvaluatedSubjects);
}
}
/** @see HistoricalBestPerformerScorer#getBestPerformers(int) */
@Override
public List<EvaluatedSubject> getBestPerformers(int maxEntries) {
synchronized (alltimeEvaluatedSubjects) {
return new ArrayList<>(alltimeEvaluatedSubjects.subList(0, maxEntries));
}
}
/** @see HistoricalBestPerformerScorer#addGeneration(List) */
@Override
public List<EvaluatedSubject> addGeneration(List<EvaluatedSubject> newGeneration) {
/*
* detect and remove duplicates in the list of individuals in this iteration.
* duplicate the array so that we don't have to bother with locking as we process
* the generation.
*/
Map<String, EvaluatedSubject> cleanedLastIterationList =
cleanRecentRun(new ArrayList<>(newGeneration));
// Build the return value - the cleaned generation. Callee will own this copy.
List<EvaluatedSubject> cleanedGenerationList =
new ArrayList<>(cleanedLastIterationList.values());
Collections.sort(cleanedGenerationList, Collections.reverseOrder());
/* Merge, detect and remove duplicates in the alltime list */
synchronized (alltimeEvaluatedSubjects) {
mergeWeightedSumFitness(alltimeEvaluatedSubjects, cleanedLastIterationList);
// TODO(team): consider some type of pruning to this tree.
Collections.sort(alltimeEvaluatedSubjects, Collections.reverseOrder());
}
return cleanedGenerationList;
}
/**
* Merges the unique items of the current run together. It merges duplicates
* by taking their average score. For example, three instances of the same
* subject scoring 21, 23 and 29 on the most recent run, will have value:
* (21 + 23 + 29) / 3.
*
* @param targetList the {@link List} of the iteration's
* {@link EvaluatedSubject EvaluatedSubjects} to be cleaned. Will not modify the List.
* @returns a {@link Map} of unique EvaluatedSubjects keyed by their command lines.
*/
@VisibleForTesting
Map<String, EvaluatedSubject> cleanRecentRun(List<EvaluatedSubject> targetList) {
Map<String, List<EvaluatedSubject>> uniqueSubjects = detectDuplicates(targetList);
Map<String, EvaluatedSubject> cleanedSubjectMap = new HashMap<>();
for (Entry<String, List<EvaluatedSubject>> duplicateEntry : uniqueSubjects.entrySet()) {
String commandLine = duplicateEntry.getKey();
List<EvaluatedSubject> duplicates = duplicateEntry.getValue();
if (duplicates.size() > 1) {
double fitness = 0.0;
for (EvaluatedSubject duplicate : duplicates) {
fitness += duplicate.getFitness();
}
fitness /= duplicates.size();
EvaluatedSubject firstDup = duplicates.get(0);
long experimentId = firstDup.getExperimentId();
cleanedSubjectMap.put(commandLine, new EvaluatedSubject(clock, firstDup.getBridge(),
fitness, experimentId));
} else { // if just one subject
cleanedSubjectMap.put(commandLine, duplicates.get(0));
}
}
return cleanedSubjectMap;
}
/**
* Given a {@link List} of {@link EvaluatedSubject}, it detects duplicates
* and returns a HashMap of unique subjects. By unique, we mean ones which
* have different
* {@link org.arbeitspferde.groningen.experimentdb.CommandLine#toArgumentString()
* CommandLine#toArgumentString()}.
*
* @param targetList the {@link List} containing duplicates
* @return a map of lists. Keys are the CommandLine.toArgumentString(),
* pointing to a list of duplicated {@link EvaluatedSubject}.
*/
@VisibleForTesting
Map<String, List<EvaluatedSubject>> detectDuplicates(List<EvaluatedSubject> targetList) {
// Put all subjects in a HashMap
HashMap<String, List<EvaluatedSubject>> uniqueSubjects =
new HashMap<>();
for (EvaluatedSubject evaluatedSubject : targetList) {
// TODO(team): Fix Law of Demeter violations here.
String commandLine = evaluatedSubject.getBridge().getCommandLine().toArgumentString();
if (!uniqueSubjects.containsKey(commandLine)) {
uniqueSubjects.put(commandLine, new ArrayList<EvaluatedSubject>());
}
uniqueSubjects.get(commandLine).add(evaluatedSubject);
}
return uniqueSubjects;
}
/**
* Merges the unique items of the current run with the unique items of older
* runs. It removes any duplicates. It weighs each score by its generation
* number, and updates all scores. For example, a subject scoring 21 on run 1,
* didn't appear in run 2, and 19 on run 3, will have value: 21 * 1 + 19 * 3.
*
* The resultant array does not get sorted here.
*
* @param oldUniqueItemsList a {@link List} of all previous unique items.
* @param uniqueNewSubjects a {@link List} of current run unique items
*/
private void mergeWeightedSumFitness(List<EvaluatedSubject> oldUniqueItemsList,
Map<String, EvaluatedSubject> uniqueNewSubjects) {
for (EvaluatedSubject evaluatedSubject : oldUniqueItemsList) {
final String key = evaluatedSubject.getBridge().getCommandLine().toArgumentString();
// if old subject occurs in current experiment
if (uniqueNewSubjects.containsKey(key)) {
EvaluatedSubject uniqueNewEvaledSubject = uniqueNewSubjects.get(key);
double currentScore = uniqueNewSubjects.get(key).getFitness();
long experimentId = uniqueNewEvaledSubject.getExperimentId();
uniqueNewSubjects.remove(key);
// update the experiment the evaluated subject is associated with.
evaluatedSubject.setExperimentId(experimentId);
evaluatedSubject.setFitness(evaluatedSubject.getFitness() +
currentScore * experimentId);
}
}
// add remaining new entries
for (EvaluatedSubject newItem : uniqueNewSubjects.values()) {
long experimentId = newItem.getExperimentId();
oldUniqueItemsList.add(new EvaluatedSubject(clock, newItem.getBridge(),
newItem.getFitness() * experimentId, experimentId));
}
}
}