package info.ephyra.querygeneration.generators; import info.ephyra.answerselection.filters.AnswerPatternFilter; import info.ephyra.answerselection.filters.AnswerTypeFilter; import info.ephyra.answerselection.filters.FactoidsFromPredicatesFilter; import info.ephyra.nlp.semantics.Predicate; import info.ephyra.querygeneration.Query; import info.ephyra.questionanalysis.AnalyzedQuestion; import info.ephyra.questionanalysis.Term; import info.ephyra.util.StringUtils; import java.util.ArrayList; import java.util.HashSet; /** * <p>The <code>BagOfWordsG</code> query generator creates a query from the * keywords in the question string.</p> * * <p>This class extends the class <code>QueryGenerator</code>.</p> * * @author Nico Schlaefer * @version 2007-07-11 */ public class BagOfWordsG extends QueryGenerator { /** Score assigned to "bag of words" queries. */ private static final float SCORE = 1; /** Answer extraction techniques for this query type. */ private static final String[] EXTRACTION_TECHNIQUES = { AnswerTypeFilter.ID, AnswerPatternFilter.ID, FactoidsFromPredicatesFilter.ID }; /** * Forms a query string from the individual keywords. * * @param terms terms in the question * @param kws keywords in the question * @return query string */ private String getQueryString(Term[] terms, String[] kws) { ArrayList<String> phraseL = new ArrayList<String>(); HashSet<String> normSet = new HashSet<String>(); // get individual keywords // - expand keywords (not supported by Web search engines!) // for (Term term : terms) { // String phrase; // Map<String, Double> expMap = term.getExpansions(); // expMap = TermExpander.reduceExpansionsQuery(expMap, true); // boolean newKeyword = false; // term/expansion contains new keyword? // // if (expMap.size() == 0) { // String[] keywords = // KeywordExtractor.getKeywords(term.getText()); // List<String> uniqueL = new ArrayList<String>(); // for (String keyword : keywords) // if (normSet.add(StringUtils.normalize(keyword))) // uniqueL.add(keyword); // String[] unique = uniqueL.toArray(new String[uniqueL.size()]); // phrase = StringUtils.concatWithSpaces(unique); // if (unique.length > 0) newKeyword = true; // } else { // // form AND query from keywords in term // String[] keywords = // KeywordExtractor.getKeywords(term.getText()); // String and = StringUtils.concat(keywords, " AND "); // if (keywords.length > 1) // and = "(" + and + ")"; // for (String keyword : keywords) // if (normSet.add(StringUtils.normalize(keyword))) // newKeyword = true; // // phrase = and; // // // append expansions // if (expMap != null && expMap.size() > 0) { // String[] expansions = // expMap.keySet().toArray(new String[expMap.size()]); // phrase = "(" + phrase; // for (String expansion : expansions) { // // form AND query from keywords in expansion // keywords = KeywordExtractor.getKeywords(expansion); // and = StringUtils.concat(keywords, " AND "); // if (keywords.length > 1) // and = "(" + and + ")"; // for (String keyword : keywords) // if (normSet.add(StringUtils.normalize(keyword))) // newKeyword = true; // // phrase += " OR " + and; // } // phrase += ")"; // } // } // // // add phrase to the query if the term or one of its expansions has // // multiple tokens and thus the keyword query is different from the // // term query // if (newKeyword) phraseL.add(phrase); // } // - do not expand keywords for (String kw : kws) if (normSet.add(StringUtils.normalize(kw))) phraseL.add(kw); // build query string String[] phrases = phraseL.toArray(new String[phraseL.size()]); String queryString = StringUtils.concatWithSpaces(phrases); return queryString; } /** * Generates a "bag of words" query from the keywords in the question * string. * * @param aq analyzed question * @return <code>Query</code> objects */ public Query[] generateQueries(AnalyzedQuestion aq) { // only generate a query if the answer type is known, predicates could // be extracted or the question is not a factoid question String[] ats = aq.getAnswerTypes(); Predicate[] ps = aq.getPredicates(); if (ats.length == 0 && ps.length == 0 && aq.isFactoid()) return new Query[0]; // create query string Term[] terms = aq.getTerms(); String[] kws = aq.getKeywords(); String queryString = getQueryString(terms, kws); // create query, set answer types Query[] queries = new Query[1]; queries[0] = new Query(queryString, aq, SCORE); queries[0].setExtractionTechniques(EXTRACTION_TECHNIQUES); return queries; } }