package edu.isistan.uima.unified.analysisengines.matetools;
import is2.data.SentenceData09;
import is2.parser.Parser;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.SubProgressMonitor;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.ExternalResource;
import edu.isistan.uima.unified.analysisengines.AnnotationGenerator;
import edu.isistan.uima.unified.sharedresources.ProgressMonitorResource;
import edu.isistan.uima.unified.typesystems.nlp.Sentence;
import edu.isistan.uima.unified.typesystems.nlp.Token;
public class CoNLLDependencyAnnotator extends JCasAnnotator_ImplBase {
@ConfigurationParameter(name="model")
private String modelName;
//
private Parser parser;
//
@ExternalResource(key="monitor")
private ProgressMonitorResource monitorResource;
private IProgressMonitor subMonitor;
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
try {
//modelName = (String) aContext.getConfigParameterValue("model");
parser = new Parser(modelName);
}
catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
if(parser == null)
return;
//
subMonitor = new SubProgressMonitor(monitorResource.getMonitor(), 1, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK);
subMonitor.subTask("Annotating CoNLL dependencies (Matetools)");
//
//String docText = aJCas.getDocumentText();
AnnotationIndex<Annotation> sAnnotations = aJCas.getAnnotationIndex(Sentence.type);
AnnotationIndex<Annotation> tAnnotations = aJCas.getAnnotationIndex(Token.type);
//
subMonitor.beginTask(this.getClass().getSimpleName(), sAnnotations.size());
//
for(Annotation sAnnotation : sAnnotations) {
//Sentence sentenceAnnotation = (Sentence) sAnnotation;
//String sentence = sAnnotation.getCoveredText();
Iterator<Annotation> tokenIterator = tAnnotations.subiterator(sAnnotation);
List<Token> tokenList = new LinkedList<Token>();
while(tokenIterator.hasNext()) {
Annotation tAnnotation = tokenIterator.next();
tokenList.add((Token)tAnnotation);
}
Token[] tokenAnnotations = new Token[tokenList.size()];
for(int i = 0; i < tokenList.size(); i++)
tokenAnnotations[i] = tokenList.get(i);
String[] tokensArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
tokensArray[i] = tokenAnnotations[i].getCoveredText();
String[] lemmasArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
lemmasArray[i] = tokenAnnotations[i].getLemma();
String[] posArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
posArray[i] = tokenAnnotations[i].getPos();
String[] morphArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
morphArray[i] = tokenAnnotations[i].getMorph();
ArrayList<String> forms = new ArrayList<String>();
forms.add("<ROOT>");
for(int i = 0; i < tokensArray.length; i++)
forms.add(tokensArray[i]);
ArrayList<String> lemmas = new ArrayList<String>();
lemmas.add("<ROOT>");
for(int i = 0; i < lemmasArray.length; i++)
lemmas.add(lemmasArray[i]);
ArrayList<String> poss = new ArrayList<String>();
poss.add("<ROOT-POS>");
for(int i = 0; i < posArray.length; i++)
poss.add(posArray[i]);
ArrayList<String> morphs = new ArrayList<String>();
morphs.add("<ROOT-PFEAT>");
for(int i = 0; i < morphArray.length; i++)
morphs.add(morphArray[i]);
SentenceData09 sentence = new SentenceData09();
sentence.init(forms.toArray(new String[0]));
sentence.lemmas = lemmas.toArray(new String[0]);
sentence.ppos = poss.toArray(new String[0]);
if(morphArray[0] != null && !morphArray[0].isEmpty())
sentence.pfeats = morphs.toArray(new String[0]);
else
sentence.pfeats = sentence.ppos.clone();
parser.apply(sentence);
for(int tokenNumber = 0; tokenNumber < tokenAnnotations.length; tokenNumber++) {
int head = sentence.heads[tokenNumber + 1];
String deprel;
int sourceIndex = head != 0 ? head - 1: head;
int targetIndex = tokenNumber;
Token source;
Token target;
int begin;
int end;
target = tokenList.get(targetIndex);
if(head != 0) {
sourceIndex = head - 1;
source = tokenList.get(sourceIndex);
deprel = sentence.labels[tokenNumber + 1];
begin = target.getBegin() < source.getBegin() ? target.getBegin() : source.getBegin();
end = target.getEnd() > source.getEnd() ? target.getEnd() : source.getEnd();
}
else {
sourceIndex = head;
source = null;
deprel = "ROOT";
begin = target.getBegin();
end = target.getEnd();
}
AnnotationGenerator.generateCoNLLDependency(begin, end, deprel, source, target, aJCas);
}
//
subMonitor.worked(1);
}
//
subMonitor.done();
}
@Override
public void destroy() {
parser = null;
super.destroy();
}
}