package info.ephyra.questionanalysis; import info.ephyra.io.MsgPrinter; import info.ephyra.nlp.indices.FunctionWords; import info.ephyra.nlp.semantics.ontologies.WordNet; import info.ephyra.util.Dictionary; import info.ephyra.util.FileUtils; import info.ephyra.util.HashDictionary; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Hashtable; import java.util.StringTokenizer; import java.util.regex.PatternSyntaxException; import javatools.PlingStemmer; /** * The <code>QuestionInterpreter</code> parses a question and determines the * TARGET, the CONTEXT and the PROPERTY it asks for. * * @author Nico Schlaefer * @version 2005-11-09 */ public class QuestionInterpreter { /** The patterns that are applied to a question. */ private static ArrayList<QuestionPattern> questionPatterns = new ArrayList<QuestionPattern>(); /** For each PROPERTY a dictionary of keywords. */ private static Hashtable<String, HashDictionary> keywords = new Hashtable<String, HashDictionary>(); /** For each PROPERTY a template for a question asking for it. */ private static Hashtable<String, String> questionTemplates = new Hashtable<String, String>(); /** For each PROPERTY a template for an answer string. */ private static Hashtable<String, String> answerTemplates = new Hashtable<String, String>(); /** * Adds the keywords in a descriptor of a question pattern to the dictionary * for the respective PROPERTY. * * @param expr pattern descriptor * @param prop PROPERTY the question pattern belongs to */ private static void addKeywords(String expr, String prop) { // tokenize expr, delimiters are meta-characters, '<', '>' and blank StringTokenizer st = new StringTokenizer(expr, "\\|*+?.^$(){}[]<> "); String token; HashDictionary dict; while (st.hasMoreTokens()) { token = st.nextToken(); if (token.length() > 2 && !FunctionWords.lookup(token)) { // token has a length of at least 3 and is not a function word dict = keywords.get(prop); if (dict == null) { // new dictionary dict = new HashDictionary(); keywords.put(prop, dict); } dict.add(token); // add token to the dictionary } } } /** * Loads the question patterns from a directory of PROPERTY files. Each file * contains a list of pattern descriptors. Their format is described in the * documentation of the class <code>QuestionPattern</code>. * * @param dir directory of the question patterns * @return true, iff the question patterns were loaded successfully */ public static boolean loadPatterns(String dir) { File[] files = FileUtils.getFiles(dir); try { BufferedReader in; String prop, line; for (File file : files) { prop = file.getName(); in = new BufferedReader(new FileReader(file)); while (in.ready()) { line = in.readLine().trim(); if (line.length() == 0 || line.startsWith("//")) continue; // skip blank lines and comments if (line.startsWith("QUESTION_TEMPLATE")) { // add question template String[] tokens = line.split("\\s+", 2); if (tokens.length > 1) questionTemplates.put(prop, tokens[1]); } else if (line.startsWith("ANSWER_TEMPLATE")) { // add answer template String[] tokens = line.split("\\s+", 2); if (tokens.length > 1) answerTemplates.put(prop, tokens[1]); } else { try { // add question pattern questionPatterns.add(new QuestionPattern(line, prop)); // add keywords to the dictionary for prop addKeywords(line, prop); } catch (PatternSyntaxException pse) { MsgPrinter.printErrorMsg("Problem loading pattern:\n" + prop + " " + line); MsgPrinter.printErrorMsg(pse.getMessage()); } } } in.close(); } } catch (IOException e) { return false; } return true; } /** * Interprets a question by applying the question patterns and returns the * interpretations of minimal length. * * @param qn normalized question string * @param stemmed stemmed question string * @return array of interpretations or an empty array, if there was no * matching question pattern */ public static QuestionInterpretation[] interpret(String qn, String stemmed) { ArrayList<QuestionInterpretation> qis = new ArrayList<QuestionInterpretation>(); // apply the question patterns for (QuestionPattern questionPattern : questionPatterns) { QuestionInterpretation qi = questionPattern.apply(qn, stemmed); if (qi != null) qis.add(qi); } // sort the interpretations by their length QuestionInterpretation[] sorted = qis.toArray(new QuestionInterpretation[qis.size()]); Arrays.sort(sorted); // only return interpretations of minimal length ArrayList<QuestionInterpretation> minLength = new ArrayList<QuestionInterpretation>(); for (QuestionInterpretation qi : sorted) if (qi.getLength() == sorted[0].getLength()) minLength.add(qi); return minLength.toArray(new QuestionInterpretation[minLength.size()]); } /** * Looks up a word in the dictionary for the given PROPERTY. * * @param word the word to be looked up * @param prop the PROPERTY * @return true, iff <code>word</code> is in the dictionary for * <code>prop</code> */ public static boolean lookupKeyword(String word, String prop) { Dictionary dict = keywords.get(prop); if (dict == null) return false; if (dict.contains(word)) return true; String stem = PlingStemmer.stem(word); if (dict.contains(stem)) return true; String lemma = WordNet.getLemma(word, WordNet.VERB); if (lemma != null && dict.contains(lemma)) return true; return false; } /** * Returns a question string that asks for the specified property of the * target object or <code>null</code> if no question template is available * for the property. * * @param to target object * @param prop property * @return question string or <code>null</code> */ public static String getQuestion(String to, String prop) { String question = questionTemplates.get(prop); if (question == null) return null; return question.replace("<TO>", to); } /** * Returns an answer string that expresses that the property object is an * instance of the specified property or <code>null</code> if no answer * template is available for the property. * * @param po property object * @param prop property * @return answer string or <code>null</code> */ public static String getAnswer(String po, String prop) { String answer = answerTemplates.get(prop); if (answer == null) return null; return answer.replace("<PO>", po); } }