package com.cse10.gate; import gate.*; import gate.creole.ExecutionException; import gate.creole.ResourceInstantiationException; import gate.creole.SerialAnalyserController; import gate.util.GateException; import java.io.File; import java.net.MalformedURLException; import java.util.logging.Level; import java.util.logging.Logger; /** * pipe line which is used for document processing * Created by Chamath on 12/20/2014. */ public class CorpusPipeLine { private SerialAnalyserController serialAnalyserController; /** * add required processing resources and configure the pipe line * * @param isPOSRequired check whether part of speech tagging is required */ public void configure(boolean isPOSRequired) { try { //load the plugin ANNIE first to use resources under that try { Gate.getCreoleRegister().registerDirectories(new File(Gate.getPluginsHome(), "ANNIE").toURI().toURL()); } catch (GateException e) { e.printStackTrace(); } catch (MalformedURLException e) { e.printStackTrace(); } //create a application using the contorller serialAnalyserController = (SerialAnalyserController) Factory.createResource("gate.creole.SerialAnalyserController", Factory.newFeatureMap(), Factory.newFeatureMap(), "TestOne"); // load each processing resource FeatureMap params = Factory.newFeatureMap(); //create each processing resource and add to application ProcessingResource annotationDeletePR = (ProcessingResource) Factory.createResource("gate.creole.annotdelete.AnnotationDeletePR", params); ProcessingResource defaultTokeniser = (ProcessingResource) Factory.createResource("gate.creole.tokeniser.DefaultTokeniser", params); //if POS is required, then use default gazetter lists. if (!isPOSRequired) { try { params.put("listsURL", new File("Classifier\\src\\main\\resources\\gazetterLists\\lists.def").toURL()); } catch (MalformedURLException e) { e.printStackTrace(); } } ProcessingResource defaultGazetteer = (ProcessingResource) Factory.createResource("gate.creole.gazetteer.DefaultGazetteer", params); params.clear(); ProcessingResource sentenceSplitter = (ProcessingResource) Factory.createResource("gate.creole.splitter.SentenceSplitter", params); ProcessingResource posTagger = (ProcessingResource) Factory.createResource("gate.creole.POSTagger", params); ProcessingResource ANNIETransducer = (ProcessingResource) Factory.createResource("gate.creole.ANNIETransducer", params); serialAnalyserController.add(annotationDeletePR); serialAnalyserController.add(defaultTokeniser); serialAnalyserController.add(defaultGazetteer); if (isPOSRequired) { serialAnalyserController.add(sentenceSplitter); serialAnalyserController.add(posTagger); serialAnalyserController.add(ANNIETransducer); } } catch (ResourceInstantiationException ex) { Logger.getLogger(CorpusPipeLine.class.getName()).log(Level.SEVERE, null, ex); } } /** * set corpus for processing * * @param corpus */ public void setCorpus(Corpus corpus) { serialAnalyserController.setCorpus(corpus); } /** * execute pipeline * * @throws ExecutionException */ public void execute() throws ExecutionException { serialAnalyserController.execute(); } }