package context.core.task.entitynetwork; import static context.app.AppConfig.getUserDirLoc; import java.io.File; import java.util.Properties; import context.app.main.ContextFXController; import context.core.entity.CTask; import context.core.entity.CorpusData; import context.core.entity.GenericTask; import context.core.entity.TaskInstance; import edu.stanford.nlp.ie.crf.CRFClassifier; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import java.io.IOException; import javafx.beans.property.DoubleProperty; import javafx.beans.property.StringProperty; import org.openide.util.Exceptions; /** * * @author Aale */ public class EntityNetworkTask extends CTask { private static CRFClassifier classifier3; private static CRFClassifier classifier7; private static CRFClassifier classifier4; private static StanfordCoreNLP pipeline; static { try { ContextFXController.appendLog("Loading classifier... (it takes times for first usage of entity detection process, please wait)"); File f3 = new File(getUserDirLoc() + "/data/Classifiers/english.all.3class.distsim.crf.ser.gz"); classifier3 = CRFClassifier.getClassifier(f3); File f7 = new File(getUserDirLoc() + "/data/Classifiers/english.muc.7class.distsim.crf.ser.gz"); classifier7 = CRFClassifier.getClassifier(f7); File f4 = new File(getUserDirLoc() + "/data/Classifiers/english.conll.4class.distsim.crf.ser.gz"); classifier4 = CRFClassifier.getClassifier(f4); // classifier3 = CRFClassifier.getClassifierNoExceptions("edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"); // ContextFXController.appendLog("Classfier #1 loaded."); // classifier7 = CRFClassifier.getClassifierNoExceptions("edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz"); // ContextFXController.appendLog("Classfier #2 loaded."); // classifier4 = CRFClassifier.getClassifierNoExceptions("edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz"); ContextFXController.appendLog("All classifiers loaded successfully."); Properties props = new Properties(); props.put("annotators", "tokenize, ssplit"); pipeline = new StanfordCoreNLP(props); } catch (IOException ex) { Exceptions.printStackTrace(ex); } catch (ClassCastException ex) { Exceptions.printStackTrace(ex); } catch (ClassNotFoundException ex) { Exceptions.printStackTrace(ex); } } /** * * @param progress * @param progressMessage */ public EntityNetworkTask(DoubleProperty progress, StringProperty progressMessage) { super(progress, progressMessage); } /** * * @param instance * @param task * @return */ @Override public TaskInstance run(TaskInstance instance, GenericTask task) { System.out.println("starting corpus statistics application"); EntityNetworkTaskInstance ins = (EntityNetworkTaskInstance) instance; task.progress(3, 20, "Loading classifiers..."); ins.set3Classifier(classifier3); ins.set4Classifier(classifier4); ins.set7Classifier(classifier7); ins.setPipeline(pipeline); CorpusData inputCorpus = (CorpusData) ins.getInput(); inputCorpus.addAllFiles(new File(inputCorpus.getPath().get())); task.progress(5, 20, "Initializing Data Structures..."); //EntityNetworkBody enb = new EntityNetworkBody(ins); EntityCorpus ec = new EntityCorpus(ins); ec.setTabularOutPath(ins.getTabularOutput(0).getPath().get()); System.out.println("Files:" + inputCorpus.getFiles()); final String path = ins.getOutputDir(); System.out.println("Output Directory:" + path); //Run corpus statistics task.progress(7, 20, "Detecting Entities..."); /*if (!enb.detectEntities()) { System.out.println("Error in detection"); return instance; }*/ ec.genStreamsFromCorpus(); task.progress(13, 20, "Generating Network..."); /*if (!enb.genNetwork()) { System.out.println("Error in generation"); return instance; }*/ ec.saveNetworks(path); //Write the output to CSV // Need the selected output File path name !!! System.out.println("process done"); /* task.progress(16, 20, "Producing Gephi Files of Network..."); enb.extractGephiOutput(filpath);*/ task.progress(20, 20, "Done."); System.out.println("writing done"); return ins; } }