package edu.uncc.cs.watsonsim.scorers;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Map;
import edu.uncc.cs.watsonsim.Answer;
import edu.uncc.cs.watsonsim.Environment;
import edu.uncc.cs.watsonsim.Phrase;
import edu.uncc.cs.watsonsim.Question;
import edu.uncc.cs.watsonsim.nlp.ApproxStringIntMap;
import edu.uncc.cs.watsonsim.nlp.StringStack;
public class Entropy extends AnswerScorer {
// This is a custom approach for about a 10-fold reduction in memory
private static final double mult = 2<<20;
private static ApproxStringIntMap dict = new ApproxStringIntMap(new StringStack());
public Entropy(Environment env) {
load(env);
}
private static synchronized void load(Environment env) {
if (dict.isEmpty()) {
int collisions = 0;
try {
ResultSet rs = env.db.prep("SELECT word, p FROM entropy;").executeQuery();
while (rs.next()) {
collisions += dict.containsKey(rs.getString(1)) ? 1 : 0;
// This mult is to put enough of the double's precision in
// the int. p is logarithmic so overflow is not a problem.
dict.put(rs.getString(1), (int)(rs.getDouble(2)*mult));
}
} catch (SQLException e) {
// Leave the table blank and give 0's
e.printStackTrace();
}
System.out.println("Loaded " + dict.size() + " words' entropy "
+ "(" + collisions + " collisions)");
}
}
protected double entropy(Iterable<String> targets) {
double ent = 0;
for (String target: targets) {
ent += dict.get(target) / mult;
}
return ent;
}
@Override
public double scoreAnswer(Question q, Answer a) {
return entropy(a.memo(Phrase.tokens));
}
}