package info.ephyra.questionanalysis;
import info.ephyra.io.Logger;
import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.indices.WordFrequencies;
import info.ephyra.nlp.semantics.Predicate;
import info.ephyra.nlp.semantics.ontologies.Ontology;
import info.ephyra.questionanalysis.atype.AnswerType;
import info.ephyra.questionanalysis.atype.FocusFinder;
import info.ephyra.questionanalysis.atype.QuestionClassifier;
import info.ephyra.questionanalysis.atype.QuestionClassifierFactory;
import info.ephyra.util.Dictionary;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import edu.cmu.lti.javelin.util.Language;
import edu.cmu.lti.util.Pair;
/**
* Analyzes a question string:
* <ul>
* <li>normalizes the question</li>
* <li>stems verbs and nouns</li>
* <li>resolves verb constructions with auxiliaries</li>
* <li>extracts keywords</li>
* <li>extracts named entities</li>
* <li>extracts and expands terms</li>
* <li>determines focus word and expected answer types</li>
* <li>interprets the question using question patterns</li>
* <li>extracts predicate-argument structures</li>
* </ul>
*
* @author Nico Schlaefer
* @version 2008-01-23
*/
public class QuestionAnalysis {
/** <code>Dictionaries</code> for term extraction. */
private static ArrayList<Dictionary> dicts = new ArrayList<Dictionary>();
/** <code>Ontologies</code> for term expansion. */
private static ArrayList<Ontology> ontologies = new ArrayList<Ontology>();
/** <code>Question Classifier</code> for determining the answer type. */
private static QuestionClassifier qc;
static {
try {
qc = QuestionClassifierFactory.getInstance(
new Pair<Language,Language>(Language.valueOf("en_US"),Language.valueOf("en_US")));
} catch (Exception e) {
e.printStackTrace();
}
}
/** String providing additional contextual information on the question. */
private static String context = "";
/** Predicates used instead of extracting predicates from the question. */
private static Predicate[] predicates;
/**
* Registers a <code>Dictionary</code>.
*
* @param dict a dictionary
*/
public static void addDictionary(Dictionary dict) {
dicts.add(dict);
}
/**
* Registers an <code>Ontology</code>.
*
* @param ontology an ontology
*/
public static void addOntology(Ontology ontology) {
ontologies.add(ontology);
}
/**
* Returns the <code>Dictionaries</code>.
*
* @return dictionaries
*/
public static Dictionary[] getDictionaries() {
return dicts.toArray(new Dictionary[dicts.size()]);
}
/**
* Returns the <code>Ontologies</code>.
*
* @return ontologies
*/
public static Ontology[] getOntologies() {
return ontologies.toArray(new Ontology[ontologies.size()]);
}
/**
* Unregisters all <code>Dictionaries</code>.
*/
public static void clearDictionaries() {
dicts.clear();
}
/**
* Unregisters all <code>Ontologies</code>.
*/
public static void clearOntologies() {
ontologies.clear();
}
/**
* Sets the context string.
*
* @param context context string
*/
public static void setContext(String context) {
QuestionAnalysis.context = context;
}
/**
* Returns the context string.
*
* @return context string
*/
public static String getContext() {
return context;
}
/**
* Clears the context string.
*/
public static void clearContext() {
context = "";
}
/**
* Sets predicates that are used instead of extracting predicates from the
* question.
*
* @param predicates the predicates
*/
public static void setPredicates(Predicate[] predicates) {
QuestionAnalysis.predicates = predicates;
}
/**
* Returns the predicates.
*
* @return the predicates
*/
public static Predicate[] getPredicates() {
return predicates;
}
/**
* Clears the predicates.
*/
public static void clearPredicates() {
predicates = null;
}
private static String[] getAtypes (String question) {
List<AnswerType> atypes = new ArrayList<AnswerType>();
try {
atypes = qc.getAnswerTypes(question);
} catch (Exception e) {
e.printStackTrace();
}
Set<AnswerType> remove = new HashSet<AnswerType>();
for (AnswerType atype : atypes) {
if (atype.getFullType(-1).equals("NONE")) {
remove.add(atype);
}
}
for (AnswerType atype : remove) {
atypes.remove(atype);
}
String[] res = new String[atypes.size()];
for (int i=0; i<atypes.size();i++) {
String atype = atypes.get(i).getFullType(-1)
.toLowerCase()
.replaceAll("\\.", "->NE")
.replaceAll("^","NE");
StringBuilder sb = new StringBuilder(atype);
Matcher m = Pattern.compile("_(\\w)").matcher(atype);
while (m.find()) {
sb.replace(m.start(), m.end(), m.group(1).toUpperCase());
m = Pattern.compile("_(\\w)").matcher(sb.toString());
}
res[i] = sb.toString();
}
return res;
}
/**
* Analyzes a question string.
*
* @param question question string
* @return analyzed question
*/
public static AnalyzedQuestion analyze(String question) {
// normalize question
String qn = QuestionNormalizer.normalize(question);
// stem verbs and nouns
String stemmed = QuestionNormalizer.stemVerbsAndNouns(qn);
MsgPrinter.printNormalization(stemmed);
Logger.logNormalization(stemmed);
// resolve verb constructions with auxiliaries
String verbMod = (QuestionNormalizer.handleAuxiliaries(qn))[0];
// TODO return only one best string
// extract keywords
String[] kws = KeywordExtractor.getKeywords(verbMod, context);
// extract named entities
String[][] nes = TermExtractor.getNes(question, context);
// extract terms and set relative frequencies
Term[] terms = TermExtractor.getTerms(verbMod, context, nes,
dicts.toArray(new Dictionary[dicts.size()]));
for (Term term : terms)
term.setRelFrequency(WordFrequencies.lookupRel(term.getText()));
// extract focus word
String focus = FocusFinder.findFocusWord(question);
// determine answer types
//String[] ats = AnswerTypeTester.getAnswerTypes(qn, stemmed);
String[] ats = getAtypes(question);
MsgPrinter.printAnswerTypes(ats);
Logger.logAnswerTypes(ats);
// interpret question
QuestionInterpretation[] qis =
QuestionInterpreter.interpret(qn, stemmed);
MsgPrinter.printInterpretations(qis);
Logger.logInterpretations(qis);
// extract predicates
Predicate[] ps = (predicates != null) ? predicates
: PredicateExtractor.getPredicates(qn, verbMod, ats, terms);
MsgPrinter.printPredicates(ps);
Logger.logPredicates(ps);
// expand terms
TermExpander.expandTerms(terms, ps,
ontologies.toArray(new Ontology[ontologies.size()]));
return new AnalyzedQuestion(question, qn, stemmed, verbMod, kws, nes,
terms, focus, ats, qis, ps);
}
public static void main (String[] args) {
String[] atypes = getAtypes(args[0]);
System.out.println(args[0]);
for (String atype : atypes) {
System.out.println(atype);
}
System.out.println("Done!");
}
}