package edu.isistan.uima.unified.analysisengines.opennlp;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import opennlp.tools.chunker.Chunker;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.SubProgressMonitor;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.ExternalResource;
import edu.isistan.uima.unified.analysisengines.AnnotationGenerator;
import edu.isistan.uima.unified.sharedresources.ProgressMonitorResource;
import edu.isistan.uima.unified.typesystems.nlp.Sentence;
import edu.isistan.uima.unified.typesystems.nlp.Token;
public class ChunkAnnotator extends JCasAnnotator_ImplBase {
@ConfigurationParameter(name="model")
private String modelName;
//
protected ChunkerModel model;
protected Chunker chunker;
//
@ExternalResource(key="monitor")
private ProgressMonitorResource monitorResource;
private IProgressMonitor subMonitor;
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
InputStream in = null;
try {
//modelName = (String) aContext.getConfigParameterValue("model");
in = new FileInputStream(modelName);
model = new ChunkerModel(in);
chunker = new ChunkerME(model);
}
catch (Exception e) {
e.printStackTrace();
}
finally {
if (in != null) {
try {
in.close();
}
catch (IOException e) {
}
}
}
}
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
if(chunker == null)
return;
//
subMonitor = new SubProgressMonitor(monitorResource.getMonitor(), 1, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK);
subMonitor.subTask("Annotating chunks (OpenNLP)");
//
//String docText = aJCas.getDocumentText();
AnnotationIndex<Annotation> sAnnotations = aJCas.getAnnotationIndex(Sentence.type);
AnnotationIndex<Annotation> tAnnotations = aJCas.getAnnotationIndex(Token.type);
//
subMonitor.beginTask(this.getClass().getSimpleName(), sAnnotations.size());
//
for(Annotation sAnnotation : sAnnotations) {
//Sentence sentenceAnnotation = (Sentence) sAnnotation;
//String sentence = sAnnotation.getCoveredText();
Iterator<Annotation> tokenIterator = tAnnotations.subiterator(sAnnotation);
List<Token> tokenList = new LinkedList<Token>();
while(tokenIterator.hasNext()) {
Annotation tAnnotation = tokenIterator.next();
tokenList.add((Token)tAnnotation);
}
Token[] tokenAnnotations = new Token[tokenList.size()];
for(int i = 0; i < tokenList.size(); i++)
tokenAnnotations[i] = tokenList.get(i);
String[] tokens = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
tokens[i] = tokenAnnotations[i].getCoveredText();
String[] pos = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
pos[i] = tokenAnnotations[i].getPos();
String[] chunks = chunker.chunk(tokens, pos);
boolean chunkStarted = false;
int chunkBegin = -1, chunkEnd = -1;
String chunkTag = "";
int chunkNumber = 0;
while(chunkNumber < chunks.length) {
Token tokenAnnotation = tokenAnnotations[chunkNumber];
//String token = tokens[chunkNumber];
String chunk = chunks[chunkNumber];
if(!chunkStarted) {
if(chunk.startsWith("B")) {
chunkStarted = true;
chunkBegin = tokenAnnotation.getBegin();
chunkEnd = tokenAnnotation.getEnd();
chunkTag = chunk.substring(2);
}
else {
chunkBegin = tokenAnnotation.getBegin();
chunkEnd = tokenAnnotation.getEnd();
chunkTag = chunk;
AnnotationGenerator.generateChunk(chunkBegin, chunkEnd, chunkTag, aJCas);
chunkBegin = -1; chunkEnd = -1;
}
chunkNumber++;
}
else {
if(chunk.startsWith("B") || chunk.startsWith("O")) {
chunkStarted = false;
AnnotationGenerator.generateChunk(chunkBegin, chunkEnd, chunkTag, aJCas);
chunkBegin = -1; chunkEnd = -1;
}
else {
chunkEnd = tokenAnnotation.getEnd();
chunkNumber++;
}
}
}
//
subMonitor.worked(1);
}
//
subMonitor.done();
}
@Override
public void destroy() {
model = null;
chunker = null;
super.destroy();
}
}