package de.berlin.hu.uima.util; import opennlp.uima.Sentence; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import org.uimafit.util.JCasUtil; import de.berlin.hu.types.PubmedDocument; import java.util.ArrayList; import java.util.Iterator; public class OpenNLPToUCompareSentenceConverterAE extends JCasAnnotator_ImplBase { @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { for (PubmedDocument document : JCasUtil.iterate(aJCas, PubmedDocument.class)) { Iterator<Sentence> sentenceIterator = JCasUtil.selectCovered(aJCas, Sentence.class, document).iterator(); ArrayList<Sentence> sentencesToRemove = new ArrayList<Sentence>(); if (!sentenceIterator.hasNext()) { //System.out.println("Document does not have any sentences. Adding sentence for entire document."); org.u_compare.shared.syntactic.Sentence ucompareSentence = new org.u_compare.shared.syntactic.Sentence(aJCas); ucompareSentence.setBegin(0); ucompareSentence.setEnd(aJCas.getDocumentText().length()); ucompareSentence.addToIndexes(); } Sentence sentence = null; while (sentenceIterator.hasNext()) { sentence = sentenceIterator.next(); org.u_compare.shared.syntactic.Sentence ucompareSentence = new org.u_compare.shared.syntactic.Sentence(aJCas); ucompareSentence.setBegin(sentence.getBegin()); ucompareSentence.setEnd(sentence.getEnd()); ucompareSentence.addToIndexes(); sentencesToRemove.add(sentence); } if (sentence != null && sentence.getEnd() < document.getEnd() && !document.getCoveredText().substring(sentence.getEnd()+1 - document.getBegin()).matches("\\s*")) { //System.out.println("Sentences do not cover entire document. Adding sentence add the end."); org.u_compare.shared.syntactic.Sentence ucompareSentence = new org.u_compare.shared.syntactic.Sentence(aJCas); ucompareSentence.setBegin(sentence.getEnd()+1); ucompareSentence.setEnd(aJCas.getDocumentText().length()); ucompareSentence.addToIndexes(); } for (Sentence sentence2 : sentencesToRemove) { sentence2.removeFromIndexes(aJCas); } } } }