package edu.isistan.uima.unified.analysisengines.stanfordnlp; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.text.AnnotationIndex; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; import org.eclipse.core.runtime.IProgressMonitor; import org.eclipse.core.runtime.SubProgressMonitor; import org.uimafit.component.JCasAnnotator_ImplBase; import org.uimafit.descriptor.ConfigurationParameter; import org.uimafit.descriptor.ExternalResource; import edu.isistan.uima.unified.analysisengines.AnnotationGenerator; import edu.isistan.uima.unified.sharedresources.ProgressMonitorResource; import edu.isistan.uima.unified.typesystems.nlp.Sentence; import edu.isistan.uima.unified.typesystems.nlp.Token; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.trees.GrammaticalStructure; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.PennTreebankLanguagePack; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreebankLanguagePack; import edu.stanford.nlp.trees.TypedDependency; public class SDDependencyAnnotator extends JCasAnnotator_ImplBase { @ConfigurationParameter(name="model") private String modelName; private LexicalizedParser lp; // @ExternalResource(key="monitor") private ProgressMonitorResource monitorResource; private IProgressMonitor subMonitor; @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); try { //modelName = (String) aContext.getConfigParameterValue("model"); lp = new LexicalizedParser(modelName); //lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories" } ); } catch (Exception e) { e.printStackTrace(); } } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { if(lp == null) return; // subMonitor = new SubProgressMonitor(monitorResource.getMonitor(), 1, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK); subMonitor.subTask("Annotating Stanford dependencies (Stanford)"); // //String docText = aJCas.getDocumentText(); AnnotationIndex<Annotation> sAnnotations = aJCas.getAnnotationIndex(Sentence.type); AnnotationIndex<Annotation> tAnnotations = aJCas.getAnnotationIndex(Token.type); // subMonitor.beginTask(this.getClass().getSimpleName(), sAnnotations.size()); // for(Annotation sAnnotation : sAnnotations) { //Sentence sentenceAnnotation = (Sentence) sAnnotation; //String sentence = sAnnotation.getCoveredText(); Iterator<Annotation> tokenIterator = tAnnotations.subiterator(sAnnotation); List<Token> tokenList = new LinkedList<Token>(); List<String> wordList = new LinkedList<String>(); List<String> postagsList = new LinkedList<String>(); while(tokenIterator.hasNext()) { Annotation tAnnotation = tokenIterator.next(); Token token = (Token) tAnnotation; tokenList.add(token); wordList.add(token.getCoveredText()); postagsList.add(token.getPos()); } ArrayList<? extends HasWord> sentence = edu.stanford.nlp.ling.Sentence.toTaggedList(wordList, postagsList); if(lp.parse(sentence)) { Tree parse = (Tree) lp.getBestParse(); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed(); for(TypedDependency td : tdl) { String relation = td.reln().getShortName().toUpperCase(); int govIndex = td.gov().index() - 1; int depIndex = td.dep().index() - 1; Token gov = tokenList.get(govIndex); Token dep = tokenList.get(depIndex); int begin = gov.getBegin() < dep.getBegin() ? gov.getBegin() : dep.getBegin(); int end = gov.getEnd() > dep.getEnd() ? gov.getEnd() : dep.getEnd(); AnnotationGenerator.generateSDDependency(begin, end, relation, gov, dep, aJCas); } } // subMonitor.worked(1); } // subMonitor.done(); } @Override public void destroy() { lp = null; super.destroy(); } }