package info.ephyra.answerselection.filters;
import info.ephyra.search.Result;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>Trims answer candidates for definitional questions to improve the
* precision by chopping of leading introductions of indirect speech.</p>
*
* <p>This class extends the class <code>Filter</code>.</p>
*
* @author Guido Sautter
* @version 2008-02-10
*/
public class CutStatementProviderFilter extends Filter {
private static final Pattern PROVIDER_AT_START = Pattern.compile("(([A-Z][a-z]++\\s*+){2,}+said\\s++([A-Za-z]+\\s+)?that\\s++){1}+.++");
// private static final Pattern PROVIDER_AT_END = Pattern.compile(".+\\,\\s+said(\\s+[A-Z][a-z]+)+");
/**
* Cut leading or tailing introductory parts of information given in
* indirect speech. This is in order to not waste result length with these
* parts. It would cut, for instance, 'XYZ said that' from 'XYZ said that
* ... some useful information ...'
*
* @param result a <code>Result</code> object
* @return the same <code>Result</code> object
*/
public Result apply(Result result) {
String text = result.getAnswer();
Matcher matcher = PROVIDER_AT_START.matcher(text);
if (matcher.matches()) result.setAnswer(text.substring(matcher.group(1).length()).trim());
// else {
// matcher = PROVIDER_AT_END.matcher(text);
// if (matcher.matches()) result.setAnswer(text.substring(0, (text.length() - matcher.group(1).length())).trim());
// }
return result;
}
/**
* Cut leading or tailing introductory parts of information given in
* indirect speech. This is in order to not waste result length with these
* parts. It would cut, for instance, 'XYZ said that' from 'XYZ said that
* ... some useful information ...'
*
* @param results array of <code>Result</code> objects
* @return extended array of <code>Result</code> objects
*/
public Result[] apply(Result[] results) {
for (int r = 0; r < results.length; r++) results[r] = this.apply(results[r]);
return results;
}
}