package com.datascience.datastoring.jobs;
import com.datascience.core.algorithms.INewDataObserver;
import com.datascience.core.base.ContValue;
import com.datascience.core.base.IData;
import com.datascience.datastoring.datamodels.memory.InMemoryData;
import com.datascience.core.base.Project;
import com.datascience.datastoring.datamodels.memory.InMemoryNominalData;
import com.datascience.datastoring.datamodels.memory.InMemoryResults;
import com.datascience.core.nominal.CategoryValue;
import com.datascience.core.nominal.INominalData;
import com.datascience.core.nominal.NominalAlgorithm;
import com.datascience.core.nominal.NominalProject;
import com.datascience.core.results.*;
import com.datascience.gal.*;
import com.datascience.galc.ContinuousIpeirotis;
import com.datascience.galc.ContinuousProject;
import com.datascience.mv.BatchMV;
import com.datascience.mv.IncrementalMV;
import com.datascience.scheduler.SchedulerFactory;
import com.datascience.serialization.ISerializer;
import com.datascience.serialization.json.JSONUtils;
import com.datascience.utils.ClosestString;
import com.datascience.utils.CostMatrix;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.reflect.TypeToken;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import static com.datascience.serialization.json.JSONUtils.t;
import static com.google.common.base.Preconditions.checkArgument;
/**
* TODO XXX FIXME make separate factory for algorithms and use it here
* @author Konrad Kurdej
*/
public class JobFactory {
protected ISerializer serializer;
protected IJobStorage jobStorage;
protected ClosestString algNameMatcher;
public JobFactory(ISerializer serializer, IJobStorage jobStorage){
this.serializer = serializer;
this.jobStorage = jobStorage;
}
protected interface AlgorithmCreator {
NominalAlgorithm create(JsonObject jo);
}
protected interface JobCreator{
Job create(JsonObject jo, String id);
// Job create(String initializationData, String jsonData, String jsonResults, String model, String id);
}
final Map<String, AlgorithmCreator> ALG_FACTORY = new HashMap();
final Map<String, JobCreator> JOB_FACTORY = new HashMap();
{
AlgorithmCreator bds = new AlgorithmCreator() {
@Override
public NominalAlgorithm create(JsonObject jo) {
BatchDawidSkene alg = new BatchDawidSkene();
alg.setEpsilon(jo.has(Constants.EPSILON) ? jo.get(Constants.EPSILON).getAsDouble() : 1e-6);
alg.setIterations(jo.has(Constants.ITERATIONS) ? jo.get(Constants.ITERATIONS).getAsInt() : 10);
return alg;
}
};
AlgorithmCreator ids = new AlgorithmCreator() {
@Override
public NominalAlgorithm create(JsonObject jo) {
IncrementalDawidSkene alg = new IncrementalDawidSkene();
alg.setEpsilon(jo.has(Constants.EPSILON) ? jo.get(Constants.EPSILON).getAsDouble() : 1e-6);
alg.setIterations(jo.has(Constants.ITERATIONS) ? jo.get(Constants.ITERATIONS).getAsInt() : 10);
return alg;
}
};
AlgorithmCreator bmv = new AlgorithmCreator() {
@Override
public NominalAlgorithm create(JsonObject jo) {
return new BatchMV();
}
};
AlgorithmCreator imv = new AlgorithmCreator() {
@Override
public NominalAlgorithm create(JsonObject jo) {
return new IncrementalMV();
}
};
ALG_FACTORY.put(Constants.BDS, bds);
ALG_FACTORY.put(Constants.BLOCKING_EM, bds);
ALG_FACTORY.put(Constants.IDS, ids);
ALG_FACTORY.put(Constants.ONLINE_EM, ids);
ALG_FACTORY.put(Constants.BMV, bmv);
ALG_FACTORY.put(Constants.BLOCKING_MV, bmv);
ALG_FACTORY.put(Constants.IMV, imv);
ALG_FACTORY.put(Constants.ONLINE_MV, imv);
JobCreator nominal = new JobCreator(){
@Override
public Job create(JsonObject jo, String id){
return createNominalJob(jo, id);
}
};
JobCreator continuous = new JobCreator(){
@Override
public Job create(JsonObject jo, String id){
return createContinuousJob(jo, id);
}
};
JOB_FACTORY.put(Constants.NOMINAL, nominal);
JOB_FACTORY.put(Constants.CONTINUOUS, continuous);
JOB_FACTORY.put(Constants.GALC, continuous);
for (String s : ALG_FACTORY.keySet())
JOB_FACTORY.put(s, nominal);
algNameMatcher = new ClosestString(JOB_FACTORY.keySet());
}
protected <T> void handleSchedulerLoading(JsonObject settings, Project project){
if (settings.has(Constants.SCHEDULER))
project.setScheduler(new SchedulerFactory<T>().create(settings));
}
protected NominalProject getNominalProject(Collection<String> categories,
Collection<CategoryValue> categoryPriors,
CostMatrix<String> costMatrix,
String algorithm,
JsonObject jo,
String id){
checkArgument(ALG_FACTORY.containsKey(t(algorithm)), "Unknown Job algorithm: ", algorithm);
NominalAlgorithm na = ALG_FACTORY.get(t(algorithm)).create(jo);
INominalData data = jobStorage.getNominalData(id);
IResults<String, DatumResult, WorkerResult> results = jobStorage.getNominalResults(id, categories);
NominalProject np = new NominalProject(na, data, results);
if (na instanceof INewDataObserver) {
na.getData().addNewUpdatableAlgorithm((INewDataObserver) na);
}
na.setModel(jobStorage.getNominalModel(id, na.getModelType()));
np.initializeCategories(categories, categoryPriors, costMatrix);
this.<String>handleSchedulerLoading(jo, np);
np.setInitializationData(jo);
return np;
}
public Job createNominalJob(JsonObject jo, String id){
checkArgument(jo.has(Constants.CATEGORIES), "You should provide categories list");
Collection<String> categories = serializer.parse(jo.get(Constants.CATEGORIES).toString(), JSONUtils.stringSetType);
Collection<CategoryValue> categoryPriors = jo.has(Constants.CATEGORY_PRIORS) ?
(Collection<CategoryValue>) serializer.parse(jo.get(Constants.CATEGORY_PRIORS).toString(), JSONUtils.categoryValuesCollectionType) : null;
CostMatrix<String> costMatrix = jo.has(Constants.COST_MATRIX) ?
(CostMatrix<String>) serializer.parse(jo.get(Constants.COST_MATRIX).toString(), CostMatrix.class) : null;
return new Job(
getNominalProject(
categories,
categoryPriors,
costMatrix,
jo.get(Constants.ALGORITM).getAsString(),
jo,
id),
id);
}
public Job createContinuousJob(JsonObject jo, String id){
ContinuousIpeirotis alg = new ContinuousIpeirotis();
alg.setEpsilon(jo.has(Constants.EPSILON) ? jo.get(Constants.EPSILON).getAsDouble() : 1e-6);
alg.setIterations(jo.has(Constants.ITERATIONS) ? jo.get(Constants.ITERATIONS).getAsInt() : 10);
IData<ContValue> data = jobStorage.getContData(id);
IResults<ContValue, DatumContResults, WorkerContResults> results = jobStorage.getContResults(id);
ContinuousProject cp = new ContinuousProject(alg, data, results);
this.<ContValue>handleSchedulerLoading(jo, cp);
cp.setInitializationData(jo);
return new Job(cp, id);
}
public <T extends Project> Job<T> create(String type, String initializationData, String jsonData,
String jsonResults, String model, String id){
JsonObject jo = new JsonParser().parse(initializationData).getAsJsonObject();
Job<T> job = create(type, jo, id);
//TODO: add new metod to JobCreator
if (type.equals(NominalProject.kind)) {
InMemoryNominalData data = serializer.parse(jsonData, InMemoryNominalData.class);
job.getProject().setResults(serializer.<InMemoryResults<String, DatumResult, WorkerResult>>parse(jsonResults, new TypeToken<InMemoryResults<String, DatumResult, WorkerResult>>() {}.getType()));
job.getProject().setData(data);
((ResultsFactory.WorkerResultNominalFactory)((AbstractResults<String, DatumResult, WorkerResult>)job.getProject().getResults()).getWorkerResultsCreator()).setCategories(data.getCategories());
}
else {
job.getProject().setResults(serializer.<InMemoryResults<ContValue, DatumContResults, WorkerContResults>>parse(jsonResults, new TypeToken<InMemoryResults<ContValue, DatumContResults, WorkerContResults>>() {}.getType()));
job.getProject().setData(serializer.<InMemoryData<ContValue>>parse(jsonData, new TypeToken<InMemoryData<ContValue>>() {}.getType()));
}
job.getProject().getAlgorithm().setModel(serializer.parse(model, job.getProject().getAlgorithm().getModelType()));
handleSchedulerLoading(jo, job.getProject());
return job;
}
public <T extends Project> Job<T> create(String type, JsonObject initializationData, String id){
checkArgument(JOB_FACTORY.containsKey(t(type)), "Unknown algorithm type: [" + type + "]. Did you mean: [" + algNameMatcher.closest(t(type)) + "]");
return JOB_FACTORY.get(t(type)).create(initializationData, id);
}
}