/**
*
*/
package com.maalaang.omtwitter.uima.ml;
import java.util.HashSet;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import com.maalaang.omtwitter.ml.SvmFeatureVector;
import com.maalaang.omtwitter.ml.SvmFeatureVector_Impl;
import com.maalaang.omtwitter.uima.type.NegationAnnotation;
import com.maalaang.omtwitter.uima.type.SentiWordNetAnnotation;
import com.maalaang.omtwitter.uima.type.TwitterSentiCorpusAnnotation;
/**
* @author Sangwon Park
*
*/
public class SvmScoreSumUnigramExFVFactory implements SvmFVFactory {
/* (non-Javadoc)
* @see com.maalaang.omtwitter.ml.SvmFeatureVectorFactory#createFeatureVectorFromJCas(org.apache.uima.jcas.JCas)
*/
public SvmFeatureVector createFeatureVectorFromJCas(JCas jcas) {
SvmFeatureVector fv = new SvmFeatureVector_Impl();
double posScoreSumSwn = 0.0;
double negScoreSumSwn = 0.0;
double posScoreSumTsc = 0.0;
double negScoreSumTsc = 0.0;
FSIterator<Annotation> negationAnnIter = jcas.getAnnotationIndex(NegationAnnotation.type).iterator();
HashSet<SentiWordNetAnnotation> negatedSwnAnnSet = new HashSet<SentiWordNetAnnotation>();
HashSet<TwitterSentiCorpusAnnotation> negatedTscAnnSet = new HashSet<TwitterSentiCorpusAnnotation>();
while (negationAnnIter.hasNext()) {
NegationAnnotation negation = (NegationAnnotation) negationAnnIter.next();
FSIterator<Annotation> swnAnnIter = jcas.getAnnotationIndex(SentiWordNetAnnotation.type).subiterator(negation);
while (swnAnnIter.hasNext()) {
negatedSwnAnnSet.add((SentiWordNetAnnotation)swnAnnIter.next());
}
FSIterator<Annotation> tscAnnIter = jcas.getAnnotationIndex(TwitterSentiCorpusAnnotation.type).subiterator(negation);
while (tscAnnIter.hasNext()) {
negatedTscAnnSet.add((TwitterSentiCorpusAnnotation)tscAnnIter.next());
}
}
FSIterator<Annotation> swnAnnIter = jcas.getAnnotationIndex(SentiWordNetAnnotation.type).iterator();
while (swnAnnIter.hasNext()) {
SentiWordNetAnnotation swnAnn = (SentiWordNetAnnotation)swnAnnIter.next();
double posScore = swnAnn.getPositiveScore();
double negScore = swnAnn.getNegativeScore();
if (negatedSwnAnnSet.contains(swnAnn)) {
double tmp = posScore;
posScore = negScore;
negScore = tmp;
}
posScoreSumSwn += posScore;
negScoreSumSwn += negScore;
}
FSIterator<Annotation> tscAnnIter = jcas.getAnnotationIndex(TwitterSentiCorpusAnnotation.type).iterator();
while (tscAnnIter.hasNext()) {
TwitterSentiCorpusAnnotation tscAnn = (TwitterSentiCorpusAnnotation)tscAnnIter.next();
double posScore = tscAnn.getPositiveScore();
double negScore = tscAnn.getNegativeScore();
if (negatedTscAnnSet.contains(tscAnn)) {
double tmp = posScore;
posScore = negScore;
negScore = tmp;
}
posScoreSumTsc += posScore;
negScoreSumTsc += negScore;
fv.setFeatureValue(tscAnn.getId() + 2, 1.0);
}
fv.setFeatureValue(1, posScoreSumSwn - negScoreSumSwn);
fv.setFeatureValue(2, posScoreSumTsc - negScoreSumTsc);
return fv;
}
}