package info.ephyra.search.searchers; import info.ephyra.querygeneration.Query; import info.ephyra.search.Result; import info.ephyra.search.Search; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * <p>A <code>KnowledgeAnnotator</code> searches a (semi)structured knowledge * source. It provides a specialized solution to certain classes of questions, * described by a set of question patterns. Only questions that match at least * one of the patterns in the field <code>qPatterns</code> are supported by a * <code>KnowledgeAnnotator</code>.</p> * * <p>It runs as a separate thread, so several queries can be performed in * parallel.</p> * * <p>This class extends the class <code>Searcher</code> and is abstract.</p> * * @author Nico Schlaefer * @version 2005-09-28 */ public abstract class KnowledgeAnnotator extends Searcher { /** Name of the knowledge annotator. */ protected String name; /** * A question that matches at least one of these patterns can be handled by * the <code>KnowledgeAnnotator</code>. */ protected ArrayList<Pattern> qPatterns = new ArrayList<Pattern>(); /** * Strings identifying the relevant content of a question by referring to * the groups in the corresponding question patterns. */ protected ArrayList<String> qContents = new ArrayList<String>(); /** Index of the matching pattern. */ protected int index; /** The <code>Matcher</code> that matched the pattern with the question. */ protected Matcher matcher; /** * Protected constructor used by the <code>getCopy()</code> method. * * @param name name of the <code>KnowledgeAnnotator</code> * @param qPatterns question patterns * @param qContents descriptors of the relevant content of a question */ protected KnowledgeAnnotator(String name, ArrayList<Pattern> qPatterns, ArrayList<String> qContents) { this.name = name; this.qPatterns = qPatterns; this.qContents = qContents; } /** * <p>Creates a <code>KnowledgeAnnotator</code> and reads the question * patterns and descriptors of the relevant content of a question from a * file.</p> * * <p>The file must have the following format:</p> * * <p><code>KnowledgeAnnotator::<br> * [name of the knowledge annotator]<br> * <br> * QuestionPatterns:<br> * [regular expression 1]<br> * [relevant content 1]<br> * ...<br> * [regular expression n]<br> * [relevant content n]</code></p> * * <p>The relevant content of a question is described by a string that may * contain group identifiers of the format <code>[group_no]</code> that are * replaced by the capturing groups that occur in the corresponding question * pattern.</p> * * @param filename file containing the question patterns and descriptors of * the relevant content of a question */ public KnowledgeAnnotator(String filename) throws IOException { File file = new File(filename); BufferedReader in = new BufferedReader(new FileReader(file)); // read name of the knowledge in.readLine(); name = in.readLine(); in.readLine(); // read answer patterns in.readLine(); while (in.ready()) { qPatterns.add(Pattern.compile(in.readLine())); qContents.add(in.readLine()); } in.close(); } /** * Tests whether the knowledge annotator is appropriate for a question by * applying the patterns in the field <code>qPatterns</code>. * * @param query <code>Query</code> object * @return true, iff the question matches at least one of the patterns in * <code>qPatterns</code> */ protected boolean matches(Query query) { String question = query.getAnalyzedQuestion().getQuestion(); for (int i = 0; i < qPatterns.size(); i++) { Matcher m = qPatterns.get(i).matcher(question); if (m.matches()) { this.query = query; // save the Query object index = i; // save the index of the pattern matcher = m; // save the matcher return true; } } return false; } /** * Extracts the relevant content of a question by resolving the group * identifiers of the format <code>[group_no]</code> in the content string * that corresponds to the matching pattern. * * @return relevant content of the question */ protected String getContent() { String content = qContents.get(index); Pattern p = Pattern.compile("\\[(\\d*)\\]"); Matcher m = p.matcher(content); // replace all group IDs by the corresponding parts of the question while (m.find()) { int group = Integer.parseInt(m.group(1)); content = content.replace(m.group(), matcher.group(group)); } return content; } /** * Creates an array of a single <code>Result</code> object form an answer * string and a document ID. * * @param answer answer string * @param docID document ID * @return array of a single <code>Result</code> object */ protected Result[] getResult(String answer, String docID) { Result[] results = new Result[1]; results[0] = new Result(answer, query, docID); // result is always returned by the QA engine results[0].setScore(Float.POSITIVE_INFINITY); return results; } /** * Returns the name of the knowledgeAnnotator. * * @return name of the knowledge annotator */ public String getKAName() { return name; } /** * <p>Returns a new instance of the <code>KnowledgeAnnotator</code>. A new * instance is created for each query.</p> * * <p>It does not necessarily return an exact copy of the current * instance.</p> * * @return new instance of the <code>KnowledgeAnnotator</code> */ public abstract KnowledgeAnnotator getCopy(); /** * <p>Sets the query and starts the thread if the knowledge annotator is * appropriate for the user question.</p> * * <p>This method should be used instead of the inherited * <code>start()</code> method without arguments.</p> * * @param query query object * @return true, iff the knowledge annotator is appropriate and the thread * was started */ public boolean start(Query query) { KnowledgeAnnotator ka = getCopy(); if (ka.matches(query)) { // wait until there are less than MAX_PENDING pending queries Search.waitForPending(); ka.start(); // one more pending query Search.incPending(); return true; } return false; } }