package edu.uncc.cs.watsonsim.scorers;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import edu.uncc.cs.watsonsim.Answer;
import edu.uncc.cs.watsonsim.Passage;
import edu.uncc.cs.watsonsim.Phrase;
import edu.uncc.cs.watsonsim.Question;
public class WordProximity extends PassageScorer {
Set<String> q_words = new HashSet<String>();
@Override
public void scoreQuestion(Question q, List<Answer> answers) {
q_words.clear();
q_words.addAll(Arrays.asList(q.text.split("\\W+")));
super.scoreQuestion(q, answers);
}
@Override
public double scorePassage(Phrase q, Answer a, Passage p) {
double distance = 1;
double average_log_distance = 0;
for (String w : p.text.split("\\W+")) {
if (q_words.contains(w)) {
average_log_distance += Math.log(distance);
distance = 1;
} else {
distance++;
}
}
// This result is given as log(interval). Does that matter?
return average_log_distance;
}
}