package info.ephyra.querygeneration; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * <p>A data structure consisting of an expression describing a query * reformulation and a score that is used by the answer selection module to rank * results that follow from that reformulation.</p> * * <p>Before the <code>get()</code> method of this class can be called, a * <code>Matcher</code> needs to be created that applies the corresponding * question pattern to the question and its <code>matches()</code> method needs * to be called. This <code>Matcher</code> must be passed to the * <code>get()</code> method. * * @author Nico Schlaefer * @version 2005-09-19 */ public class QuestionReformulation { /** * <p>Expression that describes the reformulation.</p> * * <p>The expression can contain:<br> * <ul> * <li>Group identifiers that are replaced by groups captured by the * <code>Matcher</code> that applied the corresponding question pattern to * the question string.<br> * Format: <code>[group_id]</code></li> * <li>Combined group identifiers, that are resolved by placing the second * group in between each two words in the first group. One reformulation is * created for each possible position of the second group.<br> * Format: <code>[group_id1]<[group_id2]</code></li> * <li>Other arbitrary strings that are not replaced.</li> * </ul></p> */ private String expr; /** * Score used by the answer selection module to rank results. A more * specific reformulation should be assigned a higher score. */ private float score; /** * Creates a <code>QuestionReformulation</code> object and sets the * expression and the score. * * @param expr expression describing the reformulation * @param score score of the reformulation */ public QuestionReformulation(String expr, float score) { this.expr = expr; this.score = score; } /** * Replaces group identifiers of the format <code>[group_no]</code> by the * corresponding group captured by a <code>Matcher</code> that applied the * question pattern to the question string. * * @param queryString query string containing group identifiers * @param matcher the <code>Matcher</code> that group IDs refer to * @return query string without group identifiers */ private String evalGroups(String queryString, Matcher matcher) { String result = queryString; Pattern p = Pattern.compile("\\[(\\d*)\\]"); Matcher m = p.matcher(result); // replace all group IDs by the corresponding parts of the question while (m.find()) { int group = Integer.parseInt(m.group(1)); result = result.replace(m.group(), matcher.group(group)); } return result; } /** * Replaces combined group identifiers of the format * <code>[group_no1]<[group_no2]</code> by the combinations of the * corresponding groups generated by the <code>combineStrings()<code> * method. * * @param queryString query string containing combined group identifiers * @param matcher the <code>Matcher</code> that groupd IDs refer to * @return query string without combined group identifiers */ private String[] evalCombinedGroups(String queryString, Matcher matcher) { String[] queryStrings; Pattern p = Pattern.compile("\\[(\\d*)\\]<\\[(\\d*)\\]"); Matcher m = p.matcher(queryString); if (m.find()) { // resolve the combined group IDs int group1 = Integer.parseInt(m.group(1)); int group2 = Integer.parseInt(m.group(2)); String[] combined = combineStrings(matcher.group(group1), matcher.group(group2)); queryStrings = new String[combined.length]; for (int i = 0; i < combined.length; i++) queryStrings[i] = queryString.replace(m.group(), combined[i]); } else { // create an array of only one string queryStrings = new String[1]; queryStrings[0] = queryString; } return queryStrings; } /** * Combines two strings by inserting the second string in between each two * tokens of the first string. If the first string consist of <code>n</code> * tokens, then this method returns an array of <code>n-1</code> strings. * * @param s1 first string * @param s2 second string * @return combination of the two strings */ private String[] combineStrings(String s1, String s2) { String[] tokens = s1.split(" "); String[] combined = new String[tokens.length - 1]; for (int i = 0; i < combined.length; i++) { combined[i] = ""; for (int j = 0; j < combined.length; j++) { combined[i] += tokens[j] + " "; if (i == j) combined[i] += s2 + " "; } combined[i] += tokens[tokens.length - 1]; } return combined; } /** * Returns one or more reformulations of the original question. Requires a * <code>Matcher</code> that applied the corresponding question pattern to * the question string. The method <code>matches()</code> must have been * executed since group identifiers that occur in the expression used * by the reformulator refer to groups that have been captured. * * @param matcher the <code>Matcher</code> that group IDs refer to * @return reformulations of the original question */ public String[] get(Matcher matcher) { // evaluate combined groups String[] queryStrings = evalCombinedGroups(expr, matcher); for (int i = 0; i < queryStrings.length; i++) { // evaluate remaining groups queryStrings[i] = evalGroups(queryStrings[i], matcher); // add quotation marks queryStrings[i] = "\"" + queryStrings[i] + "\""; } return queryStrings; } /** * Returns the score of the reformulation, used by the answer selection * module to rank results that follow from this reformulation. * * @return score of the reformulation */ public float getScore() { return score; } }