package edu.isistan.uima.unified.analysisengines.stanfordnlp;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.SubProgressMonitor;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.ExternalResource;
import edu.isistan.uima.unified.analysisengines.AnnotationGenerator;
import edu.isistan.uima.unified.sharedresources.ProgressMonitorResource;
import edu.isistan.uima.unified.typesystems.nlp.Sentence;
import edu.isistan.uima.unified.typesystems.nlp.Token;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TypedDependency;
public class SDDependencyAnnotator extends JCasAnnotator_ImplBase {
@ConfigurationParameter(name="model")
private String modelName;
private LexicalizedParser lp;
//
@ExternalResource(key="monitor")
private ProgressMonitorResource monitorResource;
private IProgressMonitor subMonitor;
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
try {
//modelName = (String) aContext.getConfigParameterValue("model");
lp = new LexicalizedParser(modelName);
//lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories" } );
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
if(lp == null)
return;
//
subMonitor = new SubProgressMonitor(monitorResource.getMonitor(), 1, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK);
subMonitor.subTask("Annotating Stanford dependencies (Stanford)");
//
//String docText = aJCas.getDocumentText();
AnnotationIndex<Annotation> sAnnotations = aJCas.getAnnotationIndex(Sentence.type);
AnnotationIndex<Annotation> tAnnotations = aJCas.getAnnotationIndex(Token.type);
//
subMonitor.beginTask(this.getClass().getSimpleName(), sAnnotations.size());
//
for(Annotation sAnnotation : sAnnotations) {
//Sentence sentenceAnnotation = (Sentence) sAnnotation;
//String sentence = sAnnotation.getCoveredText();
Iterator<Annotation> tokenIterator = tAnnotations.subiterator(sAnnotation);
List<Token> tokenList = new LinkedList<Token>();
List<String> wordList = new LinkedList<String>();
List<String> postagsList = new LinkedList<String>();
while(tokenIterator.hasNext()) {
Annotation tAnnotation = tokenIterator.next();
Token token = (Token) tAnnotation;
tokenList.add(token);
wordList.add(token.getCoveredText());
postagsList.add(token.getPos());
}
ArrayList<? extends HasWord> sentence = edu.stanford.nlp.ling.Sentence.toTaggedList(wordList, postagsList);
if(lp.parse(sentence)) {
Tree parse = (Tree) lp.getBestParse();
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();
for(TypedDependency td : tdl) {
String relation = td.reln().getShortName().toUpperCase();
int govIndex = td.gov().index() - 1;
int depIndex = td.dep().index() - 1;
Token gov = tokenList.get(govIndex);
Token dep = tokenList.get(depIndex);
int begin = gov.getBegin() < dep.getBegin() ? gov.getBegin() : dep.getBegin();
int end = gov.getEnd() > dep.getEnd() ? gov.getEnd() : dep.getEnd();
AnnotationGenerator.generateSDDependency(begin, end, relation, gov, dep, aJCas);
}
}
//
subMonitor.worked(1);
}
//
subMonitor.done();
}
@Override
public void destroy() {
lp = null;
super.destroy();
}
}