package info.ephyra.answerselection.filters;
import info.ephyra.nlp.NETagger;
import info.ephyra.search.Result;
import info.ephyra.util.StringUtils;
import java.util.ArrayList;
/**
* <p>The <code>FactoidSubsetFilter</code> checks a set of factoid answers for
* subset relations. If a factoid answer is a subset of another factoid answer
* (i.e. its tokens are a subset of the tokens of another answer), then the
* former is dropped and its score is tranferred to the latter.</p>
*
* <p>The filter is only applied if the longer answer is a named entity that has
* been extracted with a pattern-based or list-based tagger and thus is properly
* chunked. This is to avoid that malformatted answers which contain additional
* tokens (such as "1879 and") are preferred over properly chunked ones (e.g.
* "1879").</p>
*
* <p>This class extends the class <code>Filter</code>.</p>
*
* @author Nico Schlaefer
* @version 2007-03-05
*/
public class FactoidSubsetFilter extends Filter {
/**
* <p>Drops results that are subsets of other results and transfers their
* scores to the remaining results.</p>
*
* @param results array of <code>Result</code> objects
* @return array of <code>Result</code> objects that are not subsets
*/
public Result[] apply(Result[] results) {
// sort results by their scores in ascending order
results = (new ReverseScoreSorterFilter()).apply(results);
// sort results by their lengths in ascending order (stable)
results = (new ResultLengthSorterFilter()).apply(results);
// normalize answer strings
String[] norms = new String[results.length];
for (int i = 0; i < results.length; i++)
if (results[i].getScore() != Float.POSITIVE_INFINITY &&
results[i].getScore() != Float.NEGATIVE_INFINITY)
norms[i] = StringUtils.normalize(results[i].getAnswer());
// check for subset relations, aggregate answers
for (int i = 0; i < results.length - 1; i++) {
if (results[i].getScore() != Float.POSITIVE_INFINITY &&
results[i].getScore() != Float.NEGATIVE_INFINITY)
for (int j = results.length - 1; j > i; j--)
if (results[j].getScore() != Float.POSITIVE_INFINITY &&
results[j].getScore() != Float.NEGATIVE_INFINITY &&
results[j].isNamedEntity() &&
!NETagger.allModelType(results[j].getNeTypes()) &&
StringUtils.isSubsetKeywords(norms[i], norms[j])) {
// longer answer is a NE not extracted with a
// model-based tagger
results[j].incScore(results[i].getScore());
results[i] = null;
break;
}
}
// get remaining results
ArrayList<Result> remaining = new ArrayList<Result>();
for (Result result : results)
if (result != null) remaining.add(result);
return remaining.toArray(new Result[remaining.size()]);
}
}