package info.ephyra.answerselection.filters; import info.ephyra.questionanalysis.KeywordExtractor; import info.ephyra.search.Result; /** * <p>Filters results by the number of keywords.</p> * * <p>The score of each result is incremented by the number of keywords it * contains.</p> * * <p>A result is dropped if <code>m < Floor(Sqrt(k - 1)) + 1</code>, where * <code>k</code> is the number of keywords in the query string and * <code>m</code> is the number of keywords that also occur in the result.</p> * * <p>This class extends the class <code>Filter</code>.</p> * * @author Nico Schlaefer * @version 2005-09-14 */ public class NumberOfKeywordsFilter extends Filter { /** * Counts the number of words in the first array that occur in the second * array. Does not distinguish between lower and upper case. * * @param s1 string array 1 * @param s2 string array 2 * @return number of words in array 1 that occur in array 2 */ private int getNumberOfMatches(String[] s1, String[] s2) { int count = 0; for (String word1 : s1) for (String word2 : s2) if (word1.equalsIgnoreCase(word2)) { count++; break; // count each word in s1 only once } return count; } /** * Filters a single <code>Result</code> object. * * @param result result to filter * @return result with manipulated score or <code>null</code> if the result * is dropped */ public Result apply(Result result) { String[] kws = result.getQuery().getAnalyzedQuestion().getKeywords(); String[] wordsInResult = KeywordExtractor.tokenize(result.getAnswer()); int k = kws.length; int m = getNumberOfMatches(kws, wordsInResult); if (m >= Math.floor(Math.sqrt(k - 1)) + 1) { result.incScore(m); // manipulate score return result; // keep result } return null; // drop result } }