package info.ephyra.querygeneration;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>A <code>QuestionReformulator</code> can be applied to a question to obtain
* reformulations of the question that are likely to occur in text passages that
* answer the question.</p>
*
* <p>A question is expected to be of the format described by the
* <code>pattern</code> field. If the question does not match this pattern,
* no reformulations are created.</p>
*
* @author Nico Schlaefer
* @version 2005-11-09
*/
public class QuestionReformulator {
/**
* The pattern that identifies questions that can be processed by this
* reformulator.
*/
private Pattern pattern;
/**
* <code>QuestionReformulation</code> objects that are applied to questions
* that match the pattern.
*/
private ArrayList<QuestionReformulation> reforms =
new ArrayList<QuestionReformulation>();
/**
* <p>Creates a new <code>QuestionReformulator</code> from a file.</p>
*
* <p>The file must have the following format:</p>
*
* <p><code>QuestionPattern:<br>
* [regular expression]<br>
* <br>
* QuestionReformulations:<br>
* [expr 1]<br>
* [score 1]<br>
* ...<br>
* [expr n]<br>
* [score n]</code></p>
*
* <p><code>expr</code> is an expression describing a reformulation of the
* question. See the documentation of the class
* <code>QuestionReformulation</code> for further details on the format of
* such an expression. <code>score</code> is the score assigned to the
* reformulation and should be the higher the more specialized a
* reformulation is.</p>
*
* @param filename name of the file containing the reformulation rules
* @throws IOException if the reformulation rules could not be read
* successfully
*/
public QuestionReformulator(String filename) throws IOException {
File file = new File(filename);
BufferedReader in = new BufferedReader(new FileReader(file));
// read question pattern
in.readLine();
pattern = Pattern.compile(in.readLine(), Pattern.CASE_INSENSITIVE);
in.readLine();
// read question reformulations
in.readLine();
String expr;
float score;
while (in.ready()) {
expr = in.readLine();
score = Float.parseFloat(in.readLine());
reforms.add(new QuestionReformulation(expr, score));
}
in.close();
}
/**
* Creates reformulations of a question if it is of the format described by
* the <code>pattern</code> field and wraps them in <code>Query</code>
* objects.
*
* @param verbMod question string with modified verbs
* @return <code>Query</code objects created from question reformulations or
* <code>null</code>
*/
public Query[] apply(String verbMod) {
Matcher matcher = pattern.matcher(verbMod);
if (!matcher.matches()) return null; // question does not match pattern
ArrayList<Query> queries = new ArrayList<Query>();
String[] queryStrings;
Query query;
for (QuestionReformulation reform : reforms) { // apply reformulators
queryStrings = reform.get(matcher);
for (String queryString : queryStrings) {
// create query object and set score
query = new Query(queryString);
query.setScore(reform.getScore());
queries.add(query);
}
}
return queries.toArray(new Query[queries.size()]);
}
}