package info.ephyra.querygeneration.generators; import info.ephyra.answerselection.filters.AnswerPatternFilter; import info.ephyra.answerselection.filters.AnswerTypeFilter; import info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter; import info.ephyra.nlp.indices.FunctionWords; import info.ephyra.nlp.semantics.Predicate; import info.ephyra.querygeneration.Query; import info.ephyra.questionanalysis.AnalyzedQuestion; import info.ephyra.questionanalysis.Term; import info.ephyra.util.StringUtils; import java.util.ArrayList; import java.util.HashSet; /** * <p>The <code>PredicateG</code> query generator creates queries from the * predicates in the question string.</p> * * <p>This class extends the class <code>QueryGenerator</code>.</p> * * @author Nico Schlaefer * @version 2007-07-11 */ public class PredicateG extends QueryGenerator { /** Score assigned to queries created from predicates. */ private static final float SCORE = 2; /** Words that should not be part of a query string. */ private static final String IGNORE = "(names?|give|tell|list)"; /** Answer extraction techniques for this query type. */ private static final String[] EXTRACTION_TECHNIQUES = { AnswerTypeFilter.ID, AnswerPatternFilter.ID, FactoidsFromPredicatesFilter.ID }; /** * Forms a query string from the predicates, terms and individual keywords. * * @param predicates predicates in the question * @param terms terms in the question * @param kws keywords in the question * @return query string */ public String getQueryString(Predicate[] predicates, Term[] terms, String[] kws) { ArrayList<String> phraseL = new ArrayList<String>(); HashSet<String> normSet = new HashSet<String>(); // get predicate verbs and arguments for (Predicate predicate : predicates) { String[] verbArgs = predicate.getVerbArgs(); for (String verbArg : verbArgs) { String[] parts = verbArg.split("\t"); for (String part : parts) if (!part.matches("(?i)" + IGNORE) && // no words in IGNORE !FunctionWords.lookup(part) && // no function words normSet.add(StringUtils.normalize(part))) { // drop quotation marks String noQuotes = part.replace("\"", ""); // add quotation marks for compound phrases if (noQuotes.matches(".*?\\s.*+")) noQuotes = "\"" + noQuotes + "\""; String phrase = noQuotes; // // append expansions // Map<String, Double> expMap = // TermExpander.expandPhrase(part, terms); // if (expMap.size() > 0) { // String[] expansions = // expMap.keySet().toArray(new String[expMap.size()]); // phrase = "(" + phrase; // for (String expansion : expansions) { // // drop quotation marks // expansion = expansion.replace("\"", ""); // // add quotation marks for compound phrases // if (expansion.matches(".*?\\s.*+")) // expansion = "\"" + expansion + "\""; // // phrase += " OR " + expansion; // } // phrase += ")"; // } phraseL.add(phrase); } } } // get terms // for (Term term : terms) { // String text = term.getText(); // if (normSet.add(StringUtils.normalize(text))) { // // add quotation marks for compound phrases // if (text.matches(".*?\\s.*+")) // text = "\"" + text + "\""; // // String phrase = text; // // // append expansions // Map<String, Double> expMap = term.getExpansions(); // expMap = TermExpander.reduceExpansionsQuery(expMap, true); // if (expMap != null && expMap.size() > 0) { // String[] expansions = // expMap.keySet().toArray(new String[expMap.size()]); // phrase = "(" + phrase; // for (String expansion : expansions) { // // add quotation marks for compound phrases // if (expansion.matches(".*?\\s.*+")) // expansion = "\"" + expansion + "\""; // // phrase += " OR " + expansion; // } // phrase += ")"; // } // // phraseL.add(phrase); // } // } // get individual keywords // - expand keywords (not supported by Web search engines!) // for (Term term : terms) { // String phrase; // Map<String, Double> expMap = term.getExpansions(); // expMap = TermExpander.reduceExpansionsQuery(expMap, true); // boolean newKeyword = false; // term/expansion contains new keyword? // // if (expMap.size() == 0) { // String[] keywords = // KeywordExtractor.getKeywords(term.getText()); // List<String> uniqueL = new ArrayList<String>(); // for (String keyword : keywords) // if (normSet.add(StringUtils.normalize(keyword))) // uniqueL.add(keyword); // String[] unique = uniqueL.toArray(new String[uniqueL.size()]); // phrase = StringUtils.concatWithSpaces(unique); // if (unique.length > 0) newKeyword = true; // } else { // // form AND query from keywords in term // String[] keywords = // KeywordExtractor.getKeywords(term.getText()); // String and = StringUtils.concat(keywords, " AND "); // if (keywords.length > 1) // and = "(" + and + ")"; // for (String keyword : keywords) // if (normSet.add(StringUtils.normalize(keyword))) // newKeyword = true; // // phrase = and; // // // append expansions // if (expMap != null && expMap.size() > 0) { // String[] expansions = // expMap.keySet().toArray(new String[expMap.size()]); // phrase = "(" + phrase; // for (String expansion : expansions) { // // form AND query from keywords in expansion // keywords = KeywordExtractor.getKeywords(expansion); // and = StringUtils.concat(keywords, " AND "); // if (keywords.length > 1) // and = "(" + and + ")"; // for (String keyword : keywords) // if (normSet.add(StringUtils.normalize(keyword))) // newKeyword = true; // // phrase += " OR " + and; // } // phrase += ")"; // } // } // // // add phrase to the query if the term or one of its expansions has // // multiple tokens and thus the keyword query is different from the // // term query // if (newKeyword) phraseL.add(phrase); // } // - do not expand keywords // for (String kw : kws) // if (normSet.add(StringUtils.normalize(kw))) // phraseL.add(kw); // build query string String[] phrases = phraseL.toArray(new String[phraseL.size()]); String queryString = StringUtils.concatWithSpaces(phrases); // include context keywords in the query string for (String kw : kws) if (!StringUtils.equalsCommonNorm(queryString, kw)) queryString += " " + kw; return queryString; } /** * Generates queries from predicate-argument structures extracted from the * question string. * * @param aq analyzed question * @return <code>Query</code> objects */ public Query[] generateQueries(AnalyzedQuestion aq) { // only generate a query if predicates could be extracted Predicate[] ps = aq.getPredicates(); if (ps.length == 0) return new Query[0]; // create query string Term[] terms = aq.getTerms(); String[] kws = aq.getKeywords(); String queryString = getQueryString(ps, terms, kws); // create query, set answer types and predicates Query[] queries = new Query[1]; queries[0] = new Query(queryString, aq, SCORE); queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES); return queries; } }