package info.ephyra.trec;
import info.ephyra.io.Logger;
import info.ephyra.io.MsgPrinter;
import info.ephyra.search.Result;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>Runs and evaluates Ephyra on the data from the TREC 8-11 QA tracks.</p>
*
* <p>This class extends <code>OpenEphyraCorpus</code>.</p>
*
* @author Nico Schlaefer
* @version 2007-07-11
*/
public class EphyraTREC8To11 extends OpenEphyraCorpus {
/** Maximum number of factoid answers. */
protected static final int FACTOID_MAX_ANSWERS = 5;
/** Absolute threshold for factoid answer scores. */
protected static final float FACTOID_ABS_THRESH = 0;
/** Log file for the results returned by Ephyra. */
private static String logFile;
/** Load answers from log file? */
private static boolean loadLog = false;
/** Question strings. */
protected static String[] qss;
/** Corresponding regular expressions that describe correct answers. */
protected static String[] regexs;
/**
* Loads questions and patterns from files.
*
* @param qFile name of the question file
* @param pFile name of the pattern file
*/
private static void loadTRECData(String qFile, String pFile) {
// load questions from file
TRECQuestion[] questions = TREC8To12Parser.loadQuestions(qFile);
qss = new String[questions.length];
for (int i = 0; i < questions.length; i++)
qss[i] = questions[i].getQuestionString();
// load patterns from file
TRECPattern[] patterns = TREC8To12Parser.loadPatternsAligned(pFile);
regexs = new String[questions.length];
for (int i = 0; i < questions.length; i++)
if ((i < patterns.length) && (patterns[i] != null))
regexs[i] = patterns[i].getRegexs()[0];
}
/**
* Initializes Ephyra, asks the questions and evaluates and logs the
* answers.
*/
private static void runAndEval() {
// initialize Ephyra
EphyraTREC8To11 ephyra = new EphyraTREC8To11();
float precision = 0;
float mrr = 0;
for (int i = 0; i < qss.length; i++) {
MsgPrinter.printQuestion(qss[i]);
Logger.enableLogging(false);
// ask Ephyra or load answer from log file
Result[] results = null;
if (loadLog)
results = TREC13To16Parser.loadResults(qss[i], "FACTOID",
logFile);
if (results == null) { // answer not loaded from log file
Logger.enableLogging(true);
Logger.logFactoidStart(qss[i]);
results = ephyra.askFactoid(qss[i], FACTOID_MAX_ANSWERS,
FACTOID_ABS_THRESH);
}
// evaluate answers
boolean[] correct = new boolean[results.length];
int firstCorrect = 0;
if (regexs[i] != null) {
Pattern p = Pattern.compile(regexs[i]);
for (int j = 0; j < results.length; j++) {
Matcher m = p.matcher(results[j].getAnswer());
correct[j] = m.find();
if (correct[j] && firstCorrect == 0) firstCorrect = j + 1;
}
}
if (firstCorrect > 0) {
precision++;
mrr += ((float) 1) / firstCorrect;
}
Logger.logResultsJudged(results, correct);
Logger.logFactoidEnd();
}
precision /= qss.length;
mrr /= qss.length;
Logger.logScores(precision, mrr);
}
/**
* Runs and evaluates Epyhra on TREC data.
*
* @param args argument 1: name of the question file<br>
* argument 2: name of the pattern file<br>
* [argument 3: log=logfile (if not set an unambiguous file name
* is generated automatically)]<br>
* [argument 5: load_log (answers are loaded from the log file
* instead of querying Ephyra)]
*/
public static void main(String[] args) {
// enable output of status and error messages
MsgPrinter.enableStatusMsgs(true);
MsgPrinter.enableErrorMsgs(true);
if (args.length < 2) {
MsgPrinter.printUsage("java EphyraTREC8To11 question_file " +
"pattern_file [log=logfile] [load_log]");
System.exit(1);
}
// load questions and patterns
loadTRECData(args[0], args[1]);
for (int i = 2; i < args.length; i++)
if (args[i].matches("log=.*")) {
// set log file
logFile = args[i].substring(4);
} else if (args[i].equals("load_log")) {
// answers are loaded from log file
loadLog = true;
}
// if log file not set, generate unambiguous name
if (logFile == null) {
String n = "";
Matcher m = Pattern.compile("\\d++").matcher(args[0]);
if (m.find()) n = m.group(0);
String date = "";
Calendar c = new GregorianCalendar();
date += c.get(Calendar.DAY_OF_MONTH);
if (date.length() == 1) date = "0" + date;
date = (c.get(Calendar.MONTH) + 1) + date;
if (date.length() == 3) date = "0" + date;
date = c.get(Calendar.YEAR) + date;
logFile = "log/TREC" + n + "_" + date;
}
Logger.setLogfile(logFile);
// ask Ephyra the questions and evaluate the answers
runAndEval();
}
}