package com.packtpub.storm.trident.operator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class WordFrequencyFunction extends BaseFunction {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(WordFrequencyFunction.class);
public static final long DEFAULT_BASELINE = 10000;
private Map<String, Long> wordLikelihoods = new HashMap<String, Long>();
public WordFrequencyFunction() throws IOException {
File file = new File("src/main/resources/en.txt");
BufferedReader br = new BufferedReader(new FileReader(file));
String line;
int i = 0;
while ((line = br.readLine()) != null && i < 10000) {
String[] pair = line.split(" ");
long baseline = Long.parseLong(pair[1]);
LOG.debug("[" + pair[0] + "]=>[" + baseline + "]");
wordLikelihoods.put(pair[0].toLowerCase(), baseline);
i++;
}
br.close();
}
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String word = (String) tuple.getValue(2);
Long baseline = this.getLikelihood(word);
List<Object> newTuple = new ArrayList<Object>();
newTuple.add(baseline);
collector.emit(newTuple);
}
public long getLikelihood(String word) {
Long baseline = this.wordLikelihoods.get(word);
if (baseline == null)
return DEFAULT_BASELINE;
else
return baseline;
}
}