/** * */ package com.maalaang.omtwitter.uima.annotator; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.apache.uima.UimaContext; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.Level; import org.apache.uima.util.Logger; import com.maalaang.omtwitter.uima.type.SentenceAnnotation; import com.maalaang.omtwitter.uima.type.TokenAnnotation; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.TaggedWord; import edu.stanford.nlp.tagger.maxent.MaxentTagger; /** * @author Sangwon Park * */ public class StanfordPosAnnotator extends JCasAnnotator_ImplBase { private MaxentTagger tagger = null; private Logger logger = null; @Override public void process(JCas jcas) throws AnalysisEngineProcessException { List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(jcas.getDocumentText())); for (List<HasWord> sentence : sentences) { ArrayList<TaggedWord> taggedSentence = tagger.tagSentence(sentence); SentenceAnnotation sentenceAnn = new SentenceAnnotation(jcas); sentenceAnn.setBegin(taggedSentence.get(0).beginPosition()); sentenceAnn.setEnd(taggedSentence.get(taggedSentence.size() - 1).endPosition()); sentenceAnn.addToIndexes(); for (TaggedWord word : taggedSentence) { TokenAnnotation tokenAnn = new TokenAnnotation(jcas); tokenAnn.setBegin(word.beginPosition()); tokenAnn.setEnd(word.endPosition()); tokenAnn.setPosTag(word.tag()); tokenAnn.addToIndexes(); } } } @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); this.logger = aContext.getLogger(); try { tagger = new MaxentTagger(MaxentTagger.DEFAULT_JAR_PATH); } catch (ClassNotFoundException e) { throw new ResourceInitializationException(e); } catch (IOException e) { throw new ResourceInitializationException(e); } this.logger.log(Level.INFO, "StanfordPosAnnotator initialized"); } }