/**
*
*/
package com.maalaang.omtwitter.uima.pipeline;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.metadata.FixedFlow;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CasConsumerDescription;
import org.apache.uima.collection.CollectionProcessingManager;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.collection.EntityProcessStatus;
import org.apache.uima.collection.StatusCallbackListener;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.XMLInputSource;
import org.xml.sax.SAXException;
/**
* @author Sangwon Park
*
*/
public class OMTwitterFixedFlowPipeline {
private static final int ENTITY_CNT_FOR_LOG = 1000;
private static final int THREAD_WAIT_INTERVAL = 5000;
private CollectionReaderDescription readerDesc = null;
private AnalysisEngineDescription aggDesc = null;
private CollectionProcessingManager cpm = null;
private List<String> annotatorList = null;
private List<String> consumerList = null;
private Logger logger = null;
private long entityProcessCount = 0;
private boolean isProcessing = false;
public OMTwitterFixedFlowPipeline() {
aggDesc = UIMAFramework.getResourceSpecifierFactory().createAnalysisEngineDescription();
aggDesc.setPrimitive(false);
annotatorList = new LinkedList<String>();
consumerList = new LinkedList<String>();
logger = UIMAFramework.getLogger();
}
public void setReader(String name, InputStream descInputStream) throws IOException, InvalidXMLException {
setReader(name, new XMLInputSource(descInputStream, null));
}
public void setReader(String name, String descName) throws IOException, InvalidXMLException {
setReader(name, new XMLInputSource(getClass().getClassLoader().getResourceAsStream(descName), null));
}
private void setReader(String name, XMLInputSource in) throws IOException, InvalidXMLException {
readerDesc = UIMAFramework.getXMLParser().parseCollectionReaderDescription(in);
}
public void setReaderParameter(String name, String param, Object value) {
readerDesc.getMetaData().getConfigurationParameterSettings().setParameterValue(param, value);
}
public void setReaderParameter(String name, String group, String param, Object value) {
readerDesc.getMetaData().getConfigurationParameterSettings().setParameterValue(group, param, value);
}
public void addAnnotator(String name, InputStream descInputStream) throws IOException, InvalidXMLException {
addAnnotator(name, new XMLInputSource(descInputStream, null));
}
public void addAnnotator(String name, String descName) throws IOException, InvalidXMLException {
addAnnotator(name, new XMLInputSource(getClass().getClassLoader().getResourceAsStream(descName), null));
}
private void addAnnotator(String name, XMLInputSource in) throws IOException, InvalidXMLException {
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(in);
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put(name, desc);
annotatorList.add(name);
}
public void setAnnotatorParameter(String name, String param, Object value) {
AnalysisEngineDescription desc = (AnalysisEngineDescription) aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().get(name);
desc.getMetaData().getConfigurationParameterSettings().setParameterValue(param, value);
}
public void setAnnotatorParameter(String name, String group, String param, Object value) {
AnalysisEngineDescription desc = (AnalysisEngineDescription) aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().get(name);
desc.getMetaData().getConfigurationParameterSettings().setParameterValue(group, param, value);
}
public void addConsumer(String name, InputStream descInputStream) throws IOException, InvalidXMLException {
addConsumer(name, new XMLInputSource(descInputStream, null));
}
public void addConsumer(String name, String descName) throws IOException, InvalidXMLException {
addConsumer(name, new XMLInputSource(getClass().getClassLoader().getResourceAsStream(descName), null));
}
private void addConsumer(String name, XMLInputSource in) throws IOException, InvalidXMLException {
CasConsumerDescription desc = UIMAFramework.getXMLParser().parseCasConsumerDescription(in);
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put(name, desc);
consumerList.add(name);
}
public void setConsumerParameter(String name, String param, Object value) {
CasConsumerDescription desc = (CasConsumerDescription) aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().get(name);
desc.getMetaData().getConfigurationParameterSettings().setParameterValue(param, value);
}
public void setConsumerParameter(String name, String group, String param, Object value) {
CasConsumerDescription desc = (CasConsumerDescription) aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().get(name);
desc.getMetaData().getConfigurationParameterSettings().setParameterValue(group, param, value);
}
public void run(boolean wait) throws ResourceConfigurationException, ResourceInitializationException {
run(wait, this.getClass().getSimpleName() + ".xml");
}
public void run(boolean wait, String aggDescFile) throws ResourceConfigurationException, ResourceInitializationException {
FixedFlow flow = UIMAFramework.getResourceSpecifierFactory().createFixedFlow();
LinkedList<String> list = new LinkedList<String>();
list.addAll(annotatorList);
list.addAll(consumerList);
flow.setFixedFlow(list.toArray(new String[annotatorList.size() + consumerList.size()]));
aggDesc.getAnalysisEngineMetaData().setName(this.getClass().getSimpleName());
aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
aggDesc.getAnalysisEngineMetaData().getOperationalProperties().setMultipleDeploymentAllowed(false);
if (aggDescFile != null) {
try {
aggDesc.toXML(new OutputStreamWriter(new FileOutputStream(aggDescFile), "UTF-8"));
logger.log(Level.INFO, "description file for the pipeline was created - " + aggDescFile);
} catch (UnsupportedEncodingException e) {
logger.log(Level.WARNING, e.getMessage());
} catch (FileNotFoundException e) {
logger.log(Level.WARNING, e.getMessage());
} catch (SAXException e) {
logger.log(Level.WARNING, e.getMessage());
} catch (IOException e) {
logger.log(Level.WARNING, e.getMessage());
}
}
isProcessing = true;
cpm = UIMAFramework.newCollectionProcessingManager();
cpm.setAnalysisEngine(UIMAFramework.produceAnalysisEngine(aggDesc));
cpm.process(UIMAFramework.produceCollectionReader(readerDesc));
cpm.addStatusCallbackListener(new StatusCallbackListener() {
public void resumed() {
logger.log(Level.INFO, "CPM resumed");
}
public void paused() {
logger.log(Level.INFO, "CPM paused");
}
public void initializationComplete() {
logger.log(Level.INFO, "CPM initialization completed");
}
public void collectionProcessComplete() {
logger.log(Level.INFO, "CPM processing completed");
isProcessing = false;
}
public void batchProcessComplete() {
logger.log(Level.INFO, "CPM batch process completed");
}
public void aborted() {
logger.log(Level.SEVERE, "CPM aborted");
isProcessing = false;
}
public void entityProcessComplete(CAS arg0, EntityProcessStatus arg1) {
entityProcessCount++;
if (entityProcessCount % ENTITY_CNT_FOR_LOG == 0) {
logger.log(Level.INFO, "CPM entity process completed - " + entityProcessCount + " entities");
}
}
});
while (wait && isProcessing) {
try {
Thread.sleep(THREAD_WAIT_INTERVAL);
} catch (InterruptedException e) {
logger.log(Level.SEVERE, "sleep interrupted");
}
}
}
}