QuestionAnalysis.java example

Explorer
lucida-master
- lucida
package info.ephyra.questionanalysis;

import info.ephyra.io.Logger;
import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.indices.WordFrequencies;
import info.ephyra.nlp.semantics.Predicate;
import info.ephyra.nlp.semantics.ontologies.Ontology;
import info.ephyra.questionanalysis.atype.AnswerType;
import info.ephyra.questionanalysis.atype.FocusFinder;
import info.ephyra.questionanalysis.atype.QuestionClassifier;
import info.ephyra.questionanalysis.atype.QuestionClassifierFactory;
import info.ephyra.util.Dictionary;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.cmu.lti.javelin.util.Language;
import edu.cmu.lti.util.Pair;

/**
 * Analyzes a question string:
 * <ul>
 * <li>normalizes the question</li>
 * <li>stems verbs and nouns</li>
 * <li>resolves verb constructions with auxiliaries</li>
 * <li>extracts keywords</li>
 * <li>extracts named entities</li>
 * <li>extracts and expands terms</li>
 * <li>determines focus word and expected answer types</li>
 * <li>interprets the question using question patterns</li>
 * <li>extracts predicate-argument structures</li>
 * </ul>
 * 
 * @author Nico Schlaefer
 * @version 2008-01-23
 */
public class QuestionAnalysis {
	/** <code>Dictionaries</code> for term extraction. */
	private static ArrayList<Dictionary> dicts = new ArrayList<Dictionary>();
	/** <code>Ontologies</code> for term expansion. */
	private static ArrayList<Ontology> ontologies = new ArrayList<Ontology>();
	/** <code>Question Classifier</code> for determining the answer type. */
	private static QuestionClassifier qc;
    static {
        try {
            qc = QuestionClassifierFactory.getInstance(
            new Pair<Language,Language>(Language.valueOf("en_US"),Language.valueOf("en_US")));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
	/** String providing additional contextual information on the question. */
	private static String context = "";
	/** Predicates used instead of extracting predicates from the question. */
	private static Predicate[] predicates;
	
	/**
	 * Registers a <code>Dictionary</code>.
	 * 
	 * @param dict a dictionary
	 */
	public static void addDictionary(Dictionary dict) {
		dicts.add(dict);
	}
	
	/**
	 * Registers an <code>Ontology</code>.
	 * 
	 * @param ontology an ontology
	 */
	public static void addOntology(Ontology ontology) {
		ontologies.add(ontology);
	}
	
	/**
	 * Returns the <code>Dictionaries</code>.
	 * 
	 * @return dictionaries
	 */
	public static Dictionary[] getDictionaries() {
		return dicts.toArray(new Dictionary[dicts.size()]);
	}
	
	/**
	 * Returns the <code>Ontologies</code>.
	 * 
	 * @return ontologies
	 */
	public static Ontology[] getOntologies() {
		return ontologies.toArray(new Ontology[ontologies.size()]);
	}
	
	/**
	 * Unregisters all <code>Dictionaries</code>.
	 */
	public static void clearDictionaries() {
		dicts.clear();
	}
	
	/**
	 * Unregisters all <code>Ontologies</code>.
	 */
	public static void clearOntologies() {
		ontologies.clear();
	}
	
	/**
	 * Sets the context string.
	 * 
	 * @param context context string
	 */
	public static void setContext(String context) {
		QuestionAnalysis.context = context;
	}
	
	/**
	 * Returns the context string.
	 * 
	 * @return context string
	 */
	public static String getContext() {
		return context;
	}
	
	/**
	 * Clears the context string.
	 */
	public static void clearContext() {
		context = "";
	}
	
	/**
	 * Sets predicates that are used instead of extracting predicates from the
	 * question.
	 * 
	 * @param predicates the predicates
	 */
	public static void setPredicates(Predicate[] predicates) {
		QuestionAnalysis.predicates = predicates;
	}
	
	/**
	 * Returns the predicates.
	 * 
	 * @return the predicates
	 */
	public static Predicate[] getPredicates() {
		return predicates;
	}
	
	/**
	 * Clears the predicates.
	 */
	public static void clearPredicates() {
		predicates = null;
	}
	
    private static String[] getAtypes (String question) {
        List<AnswerType> atypes = new ArrayList<AnswerType>();
        try {
            atypes = qc.getAnswerTypes(question);
        } catch (Exception e) {
            e.printStackTrace();
        }
        Set<AnswerType> remove = new HashSet<AnswerType>();
        for (AnswerType atype : atypes) {
            if (atype.getFullType(-1).equals("NONE")) {
                remove.add(atype);
            }
        }
        for (AnswerType atype : remove) {
            atypes.remove(atype);
        }
        String[] res = new String[atypes.size()];
        for (int i=0; i<atypes.size();i++) {
            String atype = atypes.get(i).getFullType(-1)
                                            .toLowerCase()
                                            .replaceAll("\\.", "->NE")
                                            .replaceAll("^","NE");
            StringBuilder sb = new StringBuilder(atype);
            Matcher m = Pattern.compile("_(\\w)").matcher(atype);
            while (m.find()) {
                sb.replace(m.start(), m.end(), m.group(1).toUpperCase());
                m = Pattern.compile("_(\\w)").matcher(sb.toString());
            }
            res[i] = sb.toString();
        }
        return res;
    }
    
	/**
	 * Analyzes a question string.
	 * 
	 * @param question question string
	 * @return analyzed question
	 */
	public static AnalyzedQuestion analyze(String question) {
		// normalize question
		String qn = QuestionNormalizer.normalize(question);
		
		// stem verbs and nouns
		String stemmed = QuestionNormalizer.stemVerbsAndNouns(qn);
		MsgPrinter.printNormalization(stemmed);
		Logger.logNormalization(stemmed);
		
		// resolve verb constructions with auxiliaries
		String verbMod = (QuestionNormalizer.handleAuxiliaries(qn))[0];
		// TODO return only one best string
		
		// extract keywords
		String[] kws = KeywordExtractor.getKeywords(verbMod, context);
		
		// extract named entities
		String[][] nes = TermExtractor.getNes(question, context);
		
		// extract terms and set relative frequencies
		Term[] terms = TermExtractor.getTerms(verbMod, context, nes,
				dicts.toArray(new Dictionary[dicts.size()]));
		for (Term term : terms)
			term.setRelFrequency(WordFrequencies.lookupRel(term.getText()));
		
		// extract focus word
		String focus = FocusFinder.findFocusWord(question);
		
		// determine answer types
		//String[] ats = AnswerTypeTester.getAnswerTypes(qn, stemmed);
        String[] ats = getAtypes(question);
		MsgPrinter.printAnswerTypes(ats);
		Logger.logAnswerTypes(ats);
		
		// interpret question
		QuestionInterpretation[] qis =
			QuestionInterpreter.interpret(qn, stemmed);
		MsgPrinter.printInterpretations(qis);
		Logger.logInterpretations(qis);
		
		// extract predicates
		Predicate[] ps = (predicates != null) ? predicates
				: PredicateExtractor.getPredicates(qn, verbMod, ats, terms);
		MsgPrinter.printPredicates(ps);
		Logger.logPredicates(ps);
		
		// expand terms
		TermExpander.expandTerms(terms, ps,
				ontologies.toArray(new Ontology[ontologies.size()]));
		
		return new AnalyzedQuestion(question, qn, stemmed, verbMod, kws, nes,
				terms, focus, ats, qis, ps);
	}
    
    public static void main (String[] args) {
        String[] atypes = getAtypes(args[0]);
        System.out.println(args[0]);
        for (String atype : atypes) {
            System.out.println(atype);
        }
        System.out.println("Done!");
    }
}