package info.ephyra.answerselection.filters; import info.ephyra.search.Result; import java.util.ArrayList; /** * <p>Splits sentences into subclauses in order to facilitate the detection of * redundant information.</p> * * <p>This class extends the class <code>Filter</code>.</p> * * @author Guido Sautter * @version 2008-02-15 */ public class SubclauseSplitterFilter extends Filter { /** * Splits sentences into individual subclauses in order to facilitate * subsequent filtering. The idea is that redundancy detection is easier for * shorter snippets than for longer ones. * * @param results array of <code>Result</code> objects * @return extended array of <code>Result</code> objects */ public Result[] apply(Result[] results) { // raw results returned by the searchers ArrayList<Result> rawResults = new ArrayList<Result>(); for (Result r : results) { if (r.getScore() != Float.NEGATIVE_INFINITY) { String sentence = r.getAnswer(); String[] sentences = sentence.split("(\\b(although|but|how|until|what|when|where|which|who|whom|why)\\b)"); if (sentences.length != 0) { r.setAnswer(sentences[0]); rawResults.add(r); for (int s = 1; s < sentences.length; s++) { Result newRes = new Result(sentences[s], r.getQuery(), r.getDocID(), r.getHitPos()); newRes.setScore(r.getScore()); rawResults.add(newRes); } } else rawResults.add(r); } } return rawResults.toArray(new Result[rawResults.size()]); } }