package context.core.task.entitydetection;
import static context.app.AppConfig.getUserDirLoc;
import context.app.main.ContextFXController;
import context.core.entity.CTask;
import context.core.entity.CorpusData;
import context.core.entity.GenericTask;
import context.core.entity.TaskInstance;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import java.io.File;
import java.io.IOException;
import java.util.List;
import javafx.beans.property.DoubleProperty;
import javafx.beans.property.StringProperty;
import org.openide.util.Exceptions;
/**
*
* @author Aale
*/
public class EntityDetectionTask extends CTask {
private static CRFClassifier classifier3;
private static CRFClassifier classifier7;
private static CRFClassifier classifier4;
static {
try {
//TODO: for using STANFORD NER these should be uncomment!
ContextFXController.appendLog("Loading classifier... (it takes times for first usage of entity detection process, please wait)");
File f3 = new File(getUserDirLoc() + "/data/Classifiers/english.all.3class.distsim.crf.ser.gz");
classifier3 = CRFClassifier.getClassifier(f3);
// classifier3 = CRFClassifier.getClassifier("edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
ContextFXController.appendLog("Classfier #1 loaded.");
File f7 = new File(getUserDirLoc() + "/data/Classifiers/english.muc.7class.distsim.crf.ser.gz");
classifier7 = CRFClassifier.getClassifier(f7);
// classifier7 = CRFClassifier.getClassifier("edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz");
ContextFXController.appendLog("Classfier #2 loaded.");
File f4 = new File(getUserDirLoc() + "/data/Classifiers/english.conll.4class.distsim.crf.ser.gz");
classifier4 = CRFClassifier.getClassifier(f4);
// classifier4 = CRFClassifier.getClassifier("edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz");
ContextFXController.appendLog("All classifiers loaded successfully.");
} catch (IOException ex) {
Exceptions.printStackTrace(ex);
} catch (ClassCastException ex) {
Exceptions.printStackTrace(ex);
} catch (ClassNotFoundException ex) {
Exceptions.printStackTrace(ex);
}
}
/**
*
* @param progress
* @param progressMessage
*/
public EntityDetectionTask(DoubleProperty progress, StringProperty progressMessage) {
super(progress, progressMessage);
}
/**
*
* @param instance
* @param task
* @return
*/
@Override
public TaskInstance run(TaskInstance instance, GenericTask task) {
//throw new UnsupportedOperationException("Not supported yet.");
task.progress(1, 20, "Starting Entity Detection process");
EntityDetectionTaskInstance ins = (EntityDetectionTaskInstance) instance;
// not sure if you want to change how we call up the file, this is how it was done
// in the previous codebase.
//ins.setStopWordFile(new File(System.getProperty("user.dir") + "/data/Stoplists/stop.txt"));
CorpusData inputCorpus = (CorpusData) ins.getInput();
task.progress(3, 20, "Loading " + ins.getInput().getPath().get());
inputCorpus.addAllFiles(new File(inputCorpus.getPath().get()));
task.progress(5, 20, inputCorpus.getFiles().size() + " files loaded");
if (ins.getModel() == 0) { // Stanford NER
task.progress(8, 20, "Loading classifiers...");
ins.set3Classifier(classifier3);
ins.set4Classifier(classifier4);
ins.set7Classifier(classifier7);
EntityDetectionBody edb = new EntityDetectionBody(ins);
task.progress(10, 20, "Running Entity Detection...");
//Run entity Detection
if (!edb.detectEntities()) {
System.out.println("Error in detection");
return instance;
}
//Write the output to CSV
final String path = ins.getTabularOutput(0).getPath().get();
task.progress(14, 20, "Saving results in " + path);
edb.writeOutput(path);
}
task.progress(20, 20, "Results saved successfully");
task.done();
return ins;
}
}