package com.datascience.scheduler;
import com.datascience.core.base.IData;
import com.datascience.core.base.LObject;
import com.datascience.core.base.Project;
import com.datascience.core.base.Worker;
import com.datascience.core.results.DatumResult;
import com.datascience.core.results.IResults;
import com.datascience.core.results.WorkerResult;
import java.util.Collection;
import java.util.Iterator;
import static com.google.common.base.Preconditions.checkArgument;
/**
* @Author: konrad
*/
public class SchedulersForWorker {
/**
* The goal of this scheduling is to maximalize predicted cost decrease using
* workers confusion matrix
*/
public static class ConfusionMatrixBased implements ISchedulerForWorker<String> {
protected int NUMBER_OF_FIRST_OBJECTS_TO_CHECK = 100;
protected IResults<String, DatumResult, WorkerResult> results;
protected CostBasedPriorityCalculator costCalculator;
@Override
public LObject<String> nextObjectForWorker(Iterator<LObject<String>> objects, Worker worker) {
if (!results.hasWorkerResult(worker)){
return (objects.hasNext()) ? objects.next() : null;
}
WorkerResult wr = results.getWorkerResult(worker);
int objectsLeftToCheck = NUMBER_OF_FIRST_OBJECTS_TO_CHECK;
double maxCostReduction = Double.MIN_VALUE;
LObject<String> bestObject = null;
while (objectsLeftToCheck-- > 0 && objects.hasNext()){
LObject<String> object = objects.next();
double currentCost = costCalculator.getPriority(object);
if (currentCost < maxCostReduction) continue;
double costReduced = computeCostReduced(object, wr);
if (currentCost - costReduced > maxCostReduction){
bestObject = object;
maxCostReduction = currentCost - costReduced;
}
}
return bestObject;
}
private double computeCostReduced(LObject<String> object, WorkerResult wr) {
double expNewCost = 0;
DatumResult dr = results.getOrCreateDatumResult(object);
Collection<String> categories = dr.getCategoryProbabilites().keySet();
// What is the probability that the datum is really in true_category
for (String true_category: categories) {
double datumProb = dr.getCategoryProbability(true_category);
// If the datum is in c, then how the worker would label it?
for (String predicted_category : categories) {
// Calculate the probability of assigning label_to to the given datum
double labelProb = wr.getConfusionMatrix().getNormalizedErrorRate(true_category, predicted_category);
// TODO XXX FIXME - this involves algorithm :/
// Assuming that the worker assigned label_to, estimate the new cost of the example
// datum.addAssignedLabel(predicted_category);
// datum.updateObjectProbability();
// double costAfterLabeling = datum.calculateCost();
// expNewCost += datumProb * labelProb * costAfterLabeling;
// // clean up
// datum.removeAssignedLabel(label_to)
// datum.updateObjectProbability();
}
}
return expNewCost;
}
@Override
public String getId(){
return Constants.FOR_WORKERS_CM_BASED;
}
public void setInitializationData(CostBasedPriorityCalculator costCalculator,
IResults<String, DatumResult, WorkerResult> results){
this.costCalculator = costCalculator;
// ^^^ I know this is dirty to use this, but it is easier in use
this.results = results;
}
@Override
public void setProject(Project<String, ?, ?, ?> project) {
checkArgument(project.getScheduler() != null, "Wrong configuration order");
checkArgument(!(project.getScheduler().getCalculator() instanceof CostBasedPriorityCalculator),
"This scheduler for worker works only with cost calculator");
setInitializationData((CostBasedPriorityCalculator) project.getScheduler().getCalculator(),
(IResults<String, DatumResult, WorkerResult>) project.getResults());
}
}
public static class FirstNotSeen<T> implements ISchedulerForWorker<T> {
protected Project<T, ?, ?, ?> project;
@Override
public LObject<T> nextObjectForWorker(Iterator<LObject<T>> objects_it, Worker worker) {
IData<T> data = project.getData();
while (objects_it.hasNext()) {
LObject<T> object = objects_it.next();
if (data.hasAssign(object, worker)) {
continue;
}
return object;
}
return null;
}
@Override
public String getId(){
return Constants.FOR_WORKERS_FIRST_NOT_SEEN;
}
@Override
public void setProject(Project<T, ?, ?, ?> project) {
this.project = project;
}
}
}