/**
*
*/
package com.maalaang.omtwitter.uima.annotator;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import com.maalaang.omtwitter.resource.SentimentScore;
import com.maalaang.omtwitter.resource.SentimentScoreDictionary;
import com.maalaang.omtwitter.resource.SentimentScoreDictionaryFactory;
import com.maalaang.omtwitter.text.OMTweetToken;
import com.maalaang.omtwitter.text.OMTweetTokenizer;
import com.maalaang.omtwitter.uima.type.TwitterSentiCorpusAnnotation;
/**
* @author Sangwon Park
*
*/
public class TwitterSentimentScoreAnnotator extends JCasAnnotator_ImplBase {
private Logger logger = null;
private final static String PARAM_SENTI_SCORE_DIC_OBJ_FILE = "sentiScoreDicObjectFile";
private SentimentScoreDictionary sentiScoreDic = null;
private OMTweetTokenizer tweetTokenizer = null;
/* (non-Javadoc)
* @see org.apache.uima.analysis_component.AnalysisComponent_ImplBase#initialize(org.apache.uima.UimaContext)
*/
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
logger = aContext.getLogger();
tweetTokenizer = new OMTweetTokenizer();
try {
String dicFile = (String)aContext.getConfigParameterValue(PARAM_SENTI_SCORE_DIC_OBJ_FILE);
InputStream is = getClass().getClassLoader().getResourceAsStream(dicFile);
if (is == null) {
is = new FileInputStream(dicFile);
}
sentiScoreDic = SentimentScoreDictionaryFactory.loadFromSerializedFile(is);
} catch (FileNotFoundException e) {
logger.log(Level.SEVERE, e.getMessage());
throw new ResourceInitializationException(e);
} catch (ClassNotFoundException e) {
logger.log(Level.SEVERE, e.getMessage());
throw new ResourceInitializationException(e);
} catch (IOException e) {
logger.log(Level.SEVERE, e.getMessage());
throw new ResourceInitializationException(e);
}
logger.log(Level.INFO, "twitter sentiment score annotator initialized");
}
/* (non-Javadoc)
* @see org.apache.uima.analysis_component.JCasAnnotator_ImplBase#process(org.apache.uima.jcas.JCas)
*/
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
OMTweetToken[] tokenList = tweetTokenizer.tokenize(aJCas.getDocumentText());
for (OMTweetToken tok : tokenList) {
SentimentScore score = sentiScoreDic.find(tok.getNormalizedText());
if (score != null) {
TwitterSentiCorpusAnnotation ann = new TwitterSentiCorpusAnnotation(aJCas, tok.getBegin(), tok.getEnd());
ann.setId(score.getId());
ann.setPositiveScore(score.getPositiveScore());
ann.setNegativeScore(score.getNegativeScore());
ann.setSubjectiveScore(score.getSubjectiveScore());
ann.setObjectiveScore(score.getObjectiveScore());
ann.addToIndexes();
}
}
}
}