/**
*
*/
package com.maalaang.omtwitter.text;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import com.maalaang.omtwitter.model.OMTweet;
/**
* @author Sangwon Park
*
*/
public class FilterDomainRelevance implements TweetFilter {
private Map<String,Double> wrsMap = null;
private Set<String> stopwords = null;
private double relevanceFactor = 0.0;
private int windowSize = 0;
private double startWindowScore = 0.0;
private boolean irrelevance = false;
private LinkedList<Double> windowQueue = null;
private double windowScoreSum = 0.0;
private boolean filtered = false;
private int processCnt = 0;
private boolean useWindowScore = false;
public FilterDomainRelevance(Map<String,Double> wrsMap, Set<String> stopwords, double relevanceFactor, int windowSize, double startWindowScore) {
this(wrsMap, stopwords, relevanceFactor, windowSize, startWindowScore, false);
}
public FilterDomainRelevance(Map<String,Double> wrsMap, Set<String> stopwords, double relevanceFactor, int windowSize, double startWindowScore, boolean irrelevance) {
this.wrsMap = wrsMap;
this.stopwords = stopwords;
this.relevanceFactor = relevanceFactor;
this.windowSize = windowSize;
this.startWindowScore = startWindowScore;
this.irrelevance = irrelevance;
}
public void initialize() {
windowQueue = new LinkedList<Double>();
}
public void next(OMTweet tweet, OMTweetToken[] tokenList) {
double rs = relevanceScore(tokenList);
double threshold = useWindowScore && windowSize > 0 ? windowScoreSum / (double) windowSize : startWindowScore;
threshold *= relevanceFactor;
filtered = false;
if (!irrelevance) {
if (rs < threshold)
filtered = true;
} else {
if (rs > threshold)
filtered = true;
}
windowScoreSum += rs;
windowQueue.addLast(rs);
if (windowQueue.size() > windowSize) {
windowScoreSum -= windowQueue.remove();
}
if (!useWindowScore && ++processCnt >= windowSize) {
useWindowScore = true;
}
}
public boolean isFilteredOut() {
return filtered;
}
public void close() {
windowQueue.clear();
windowQueue = null;
}
private double relevanceScore(OMTweetToken[] tokenList) {
int tokenCnt = tokenList.length;
double sum = 0.0;
Double wrs = null;
for (OMTweetToken tok : tokenList) {
switch (tok.getType()) {
case OMTweetToken.TOKEN_TYPE_HASHTAG:
wrs = wrsMap.get(tok.getText().substring(1));
if (wrs != null) {
sum += wrs;
}
break;
case OMTweetToken.TOKEN_TYPE_NORMAL:
if (!stopwords.contains(tok.getText())) {
if ((wrs = wrsMap.get(tok.getText())) != null) {
sum += wrs;
}
}
break;
}
}
return tokenCnt != 0 ? sum / (double) tokenCnt : 0.0;
}
}