package info.ephyra.trec;
import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.OpenNLP;
import info.ephyra.nlp.SnowballStemmer;
import info.ephyra.nlp.StanfordParser;
import info.ephyra.questionanalysis.QuestionNormalizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class resolves references within a question string to the target
* description, previous questions or previous answers as required since the
* TREC 13 QA track.
*
* @author Petra Gieselmann, Nico Schlaefer, Manas Pathak
* @version 2007-07-20
*/
public class CorefResolver {
/**
* Regular expression for English singular third person personal pronouns
* for persons.
*/
public static final String singularThirdPersonPronounString = "((?i)(\\bhe\\b)|(\\bshe\\b)|(\\bhim\\b))";
public static final Pattern singularThirdPersonPronounPattern = Pattern
.compile("(.* )?".concat(singularThirdPersonPronounString).concat(
"(.*)?"));
/**
* Regular expression for English singular third person personal pronouns
* for things.
*/
public static final String singularThirdThingPronounString = "((?i)(\\bit\\b))";
public static final Pattern singularThirdThingPronounPattern = Pattern
.compile("(.* )?".concat(singularThirdThingPronounString).concat(
"(.*)?"));
/** Regular expression for English plural third person personal pronouns. */
public static final String pluralThirdPersonPronounString = "((?i)(\\bthey\\b)|(\\bthem\\b))";
public static final Pattern pluralThirdPersonPronounPattern = Pattern
.compile("(.* )?".concat(pluralThirdPersonPronounString).concat(
"(.*)?"));
/**
* Regular expression for English singular third person possessive pronouns
* for persons.
*/
public static final String singularThirdPersonPronounStringGen = "((?i)(\\bhis\\b)|(\\bhers\\b))";
public static final Pattern singularThirdPersonPronounPatternGen = Pattern
.compile("(.* )?".concat(singularThirdPersonPronounStringGen)
.concat("(.*)?"));
/**
* Regular expression for English singular third person possessive pronouns
* for things.
*/
public static final String singularThirdThingPronounStringGen = "((?i)(\\bits\\b))";
public static final Pattern singularThirdThingPronounPatternGen = Pattern
.compile("(.* )?".concat(singularThirdThingPronounStringGen)
.concat("(.*)?"));
/** Regular expression for English plural third person possessive pronouns. */
public static final String pluralThirdPersonPronounStringGen = "((?i)(\\btheir\\b)|(\\btheirs\\b))";
public static final Pattern pluralThirdPersonPronounPatternGen = Pattern
.compile("(.* )?".concat(pluralThirdPersonPronounStringGen).concat(
"(.*)?"));
/**
* Regular expression for English singular third person personal and
* possessive pronoun her.
*/
public static final String singularThirdPersonPronounStringAmb = "((?i)(\\bher\\b))";
public static final Pattern singularThirdPersonPronounPatternAmb = Pattern
.compile("(.* )?".concat(singularThirdPersonPronounStringAmb)
.concat("(.*)?"));
/** Regular expression for English singular demonstrative pronoun. */
public static final String singularDemPronounString = "((?i)(\\bthis\\b))";
public static final Pattern singularDemPronounPattern = Pattern
.compile("(.* )?".concat(singularDemPronounString).concat("(.*)?"));
/** Regular expression for English singular demonstrative pronoun. */
public static final String pluralDemPronounString = "((?i)(\\bthose\\b)|(\\bthese\\b))";
public static final Pattern pluralDemPronounPattern = Pattern
.compile("(.* )?".concat(pluralDemPronounString).concat("(.*)?"));
/** Regular expression for targets. */
public static final String verifyTargetString = "[a-zA-Z\\s]+";
public static final Pattern verifyTargetPattern = Pattern
.compile(verifyTargetString);//.concat("(.*)?"));
/**
* Resolves references ONLY to the target description. This method is called
* once for each factoid and list question in the series.
*
* @param target
* the question series including answers to previous questions
* @param next
* the next question in the series to be answered
*/
public static void resolvePronounsToTarget(TRECTarget target, int next) {
String currentTarget = target.getCondensedTarget();
TRECQuestion[] questions = target.getQuestions();
String currentQuestionString = questions[next].getQuestionString();
String temp = isTargetPerson(currentTarget);
boolean personFlag = temp != null;
String currentTargetPerson = currentTarget;
if (personFlag) {
currentTargetPerson = temp;
}
// genitive of current Target
String currentTargetGen = null;
String currentTargetPersonGen = null;
// rest of the sentence after pronoun occured
String rest = null;
// tokenized target
String[] tokens = OpenNLP.tokenize(currentTarget);
// create genitive of currentTarget
if (currentTarget.endsWith("s")) {
currentTargetGen = currentTarget.concat("'");
} else {
currentTargetGen = currentTarget.concat("'s");
}
// create genitive of currentTargetPerson
if (currentTargetPerson.endsWith("s")) {
currentTargetPersonGen = currentTargetPerson.concat("'");
} else {
currentTargetPersonGen = currentTargetPerson.concat("'s");
}
// Collection<String> nplist = find(parse(currentTargetGen), "NP").values();
// System.out.println("-->" + nplist + ": " + nplist.size());
// if (nplist.size() > 1) {
// return;
// }
//
// String max = currentTargetGen;
//
// for (String s : nplist) {
// String curr = unparse(s);
//
// if (curr.length() < max.length()) {
// max = curr;
// }
// }
//
// currentTargetGen = max;
/*
* Resolve personal, possessive and demonstrative pronouns by the target
* as antecedent
*/
String firstPronoun = "";
int firstIndex = Integer.MAX_VALUE;
// Matcher sgpers = singularThirdPersonPronounPattern
// .matcher(currentQuestionString);
String[] splitSgpers = currentQuestionString.split(singularThirdPersonPronounString);
int firstSgpers = splitSgpers[0].length();
if (splitSgpers.length > 1 && firstSgpers < firstIndex) {
firstPronoun = "sgpers";
firstIndex = firstSgpers;
}
// Matcher sgthing = singularThirdThingPronounPattern
// .matcher(currentQuestionString);
String[] splitSgthing = currentQuestionString.split(singularThirdThingPronounString);
int firstSgthing = splitSgthing[0].length();
if (splitSgthing.length > 1 && firstSgthing < firstIndex) {
firstPronoun = "sgthing";
firstIndex = firstSgthing;
}
// Matcher plpers = pluralThirdPersonPronounPattern
// .matcher(currentQuestionString);
String[] splitPlpers = currentQuestionString.split(pluralThirdPersonPronounString);
int firstPlpers = splitPlpers[0].length();
if (splitPlpers.length > 1 && firstPlpers < firstIndex) {
firstPronoun = "plpers";
firstIndex = firstPlpers;
}
// Matcher sgposs = singularThirdPersonPronounPatternGen
// .matcher(currentQuestionString);
String[] splitSgposs = currentQuestionString.split(singularThirdPersonPronounStringGen);
int firstSgposs = splitSgposs[0].length();
if (splitSgposs.length > 1 && firstSgposs < firstIndex) {
firstPronoun = "sgposs";
firstIndex = firstSgposs;
}
// Matcher sgthingposs = singularThirdThingPronounPatternGen
// .matcher(currentQuestionString);
String[] splitSgthingposs = currentQuestionString.split(singularThirdThingPronounStringGen);
int firstSgthingposs = splitSgthingposs[0].length();
if (splitSgthingposs.length > 1 && firstSgthingposs < firstIndex) {
firstPronoun = "sgthingposs";
firstIndex = firstSgthingposs;
}
// Matcher plposs = pluralThirdPersonPronounPatternGen
// .matcher(currentQuestionString);
String[] splitPlposs = currentQuestionString.split(pluralThirdPersonPronounStringGen);
int firstPlposs = splitPlposs[0].length();
if (splitPlposs.length > 1 && firstPlposs < firstIndex) {
firstPronoun = "plposs";
firstIndex = firstPlposs;
}
Matcher her = singularThirdPersonPronounPatternAmb
.matcher(currentQuestionString);
String[] splitHer = currentQuestionString.split(singularThirdPersonPronounStringAmb);
int firstHer = splitHer[0].length();
if (splitHer.length > 1 && firstPlposs < firstIndex) {
firstPronoun = "her";
firstIndex = firstHer;
}
Matcher sgdem = singularDemPronounPattern
.matcher(currentQuestionString);
String[] splitSgdem = currentQuestionString.split(singularDemPronounString);
int firstSgdem = splitSgdem[0].length();
if (splitSgdem.length > 1 && firstSgdem < firstIndex) {
firstPronoun = "sgdem";
firstIndex = firstSgdem;
}
Matcher pldem = pluralDemPronounPattern.matcher(currentQuestionString);
String[] splitPldem = currentQuestionString.split(pluralDemPronounString);
int firstPldem = splitPldem[0].length();
if (splitPldem.length > 1 && firstPldem < firstIndex) {
firstPronoun = "pldem";
firstIndex = firstPldem;
}
//System.out.println("firstPronoun: +" + firstPronoun + "+");
// Start replacing
if (personFlag && firstPronoun.equals("sgposs")) {
currentQuestionString = currentQuestionString.replaceFirst(
singularThirdPersonPronounStringGen, currentTargetPersonGen);
}
if (firstPronoun.equals("sgthingposs")) {
currentQuestionString = currentQuestionString.replaceFirst(
singularThirdThingPronounStringGen, currentTargetGen);
}
if (firstPronoun.equals("plposs")) {
currentQuestionString = currentQuestionString.replaceFirst(
pluralThirdPersonPronounStringGen, currentTargetGen);
}
if (personFlag && firstPronoun.equals("her") && her.matches()) {
rest = currentQuestionString.substring(
currentQuestionString.indexOf(her.group(2))
+ her.group(2).length() + 1).toLowerCase();
String[] questionTokens = OpenNLP.tokenize(rest);
String[] pos = OpenNLP.tagPos(questionTokens);
// check whether her is used as possessive pronoun or as personal
// pronoun
if (pos[0].equalsIgnoreCase("NN")) {
currentQuestionString = currentQuestionString.replaceFirst(
singularThirdPersonPronounStringAmb, currentTargetPersonGen);
} else {
currentQuestionString = currentQuestionString.replaceFirst(
singularThirdPersonPronounStringAmb, currentTargetPerson);
}
}
if (firstPronoun.equals("sgdem") && sgdem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(
currentQuestionString.indexOf(sgdem.group(2))
+ sgdem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceFirst(
" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceFirst(
"\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceFirst(
singularDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceFirst(
singularDemPronounString, currentTargetGen);
}
if (firstPronoun.equals("pldem") && pldem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(
currentQuestionString.indexOf(pldem.group(2))
+ pldem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceFirst(
" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceFirst(
"\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceFirst(
pluralDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceFirst(
pluralDemPronounString, currentTargetGen);
}
if (personFlag && firstPronoun.equals("sgpers")) {
currentQuestionString = currentQuestionString.replaceFirst(
singularThirdPersonPronounString, currentTargetPerson);
}
if (firstPronoun.equals("sgthing")) {
currentQuestionString = currentQuestionString.replaceFirst(
singularThirdThingPronounString, currentTarget);
}
if (firstPronoun.equals("plpers")) {
currentQuestionString = currentQuestionString.replaceFirst(
pluralThirdPersonPronounString, currentTarget);
}
questions[next].setQuestionString(currentQuestionString);
MsgPrinter.printResolvedQuestion(currentQuestionString);
}
private static String isTargetPerson(String currentTarget) {
Matcher tgt = verifyTargetPattern.matcher(currentTarget);
if (!tgt.matches()) {
return null;
}
if (isAllUpper(currentTarget)) {
return null;
}
String[] split = currentTarget.split("\\s+");
int jc = 0;
boolean flagUpper = true;
for (String s : split) {
char c = s.charAt(0);
if (Character.isLowerCase(c)) {
if (!flagUpper) {
return null;
}
jc++;
} else {
flagUpper = false;
}
}
if (flagUpper || jc > 1) {
return null;
}
String temp = "";
for (int i = jc; i < split.length; i++) {
temp += " " + split[i];
}
return temp.substring(1);
}
private static boolean isAllUpper(String s) {
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (Character.isLowerCase(c)) {
return false;
}
}
return true;
}
/**
* Resolves references to the target description, previous questions or
* answers. This method is called once for each factoid and list question in
* the series.
*
* @param target
* the question series including answers to previous questions
* @param next
* the next question in the series to be answered
*/
public static void resolvePronouns(TRECTarget target, int next) {
String currentTarget = target.getCondensedTarget();
TRECQuestion[] questions = target.getQuestions();
String currentQuestionString = questions[next].getQuestionString();
// genitive of current Target
String currentTargetGen = null;
// rest of the sentence after pronoun occured
String rest = null;
// tokenized target
String[] tokens = OpenNLP.tokenize(currentTarget);
// expected answer type - 1: thing, 2: person
int exp = 0;
// is target a person?
boolean targetPerson = false;
// is target a thing? - Not used at the moment: Too many problems
boolean targetThing = false;
// create genitive of currentTarget
if (currentTarget.endsWith("s")) {
currentTargetGen = currentTarget.concat("'");
} else {
currentTargetGen = currentTarget.concat("'s");
}
// System.out.println("Target: "+currentTarget );
// System.out.println(next+ "Original:"+currentQuestionString );
String[] targetTypes = target.getTargetTypes();
if ((targetTypes.length == 1) && (targetTypes[0] == "PERSON")) {
targetPerson = true;
}
if ((targetTypes.length != 4)
&& ((targetTypes[0] != "PERSON")
|| (targetTypes[1] != "PERSON") || (targetTypes[2] != "PERSON"))) {
targetThing = true;
}
/*
* Resolve personal, possessive and demonstrative pronouns by the target
* as antecedent
*/
Matcher sgpers = singularThirdPersonPronounPattern
.matcher(currentQuestionString);
Matcher sgthing = singularThirdThingPronounPattern
.matcher(currentQuestionString);
Matcher plpers = pluralThirdPersonPronounPattern
.matcher(currentQuestionString);
Matcher sgposs = singularThirdPersonPronounPatternGen
.matcher(currentQuestionString);
Matcher sgthingposs = singularThirdThingPronounPatternGen
.matcher(currentQuestionString);
Matcher plposs = pluralThirdPersonPronounPatternGen
.matcher(currentQuestionString);
Matcher her = singularThirdPersonPronounPatternAmb
.matcher(currentQuestionString);
Matcher sgdem = singularDemPronounPattern
.matcher(currentQuestionString);
Matcher pldem = pluralDemPronounPattern.matcher(currentQuestionString);
if (sgposs.matches()) {
exp = 2;
// if targetType is a thing, do not use it
if ((!targetThing) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(
singularThirdPersonPronounStringGen, currentTargetGen);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(
singularThirdPersonPronounStringGen, currentTargetGen);
}
}
if (sgthingposs.matches()) {
exp = 1;
// if targetType is a person, do not use it
if ((!targetPerson) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(
singularThirdThingPronounStringGen, currentTargetGen);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(
singularThirdThingPronounStringGen, currentTargetGen);
}
}
if (plposs.matches()) {
currentQuestionString = currentQuestionString.replaceAll(
pluralThirdPersonPronounStringGen, currentTargetGen);
}
if (her.matches()) {
rest = currentQuestionString.substring(
currentQuestionString.indexOf(her.group(2))
+ her.group(2).length() + 1).toLowerCase();
String[] questionTokens = OpenNLP.tokenize(rest);
String[] pos = OpenNLP.tagPos(questionTokens);
// check whether her is used as possessive pronoun or as personal
// pronoun
if (pos[0].equalsIgnoreCase("NN")) {
currentQuestionString = currentQuestionString.replaceAll(
singularThirdPersonPronounStringAmb, currentTargetGen);
} else {
currentQuestionString = currentQuestionString.replaceAll(
singularThirdPersonPronounStringAmb, currentTarget);
}
}
if (sgdem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(
currentQuestionString.indexOf(sgdem.group(2))
+ sgdem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceAll(
" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceAll(
"\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceAll(
singularDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceAll(
singularDemPronounString, currentTargetGen);
}
if (pldem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(
currentQuestionString.indexOf(pldem.group(2))
+ pldem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceAll(
" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceAll(
"\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceAll(
pluralDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceAll(
pluralDemPronounString, currentTargetGen);
}
if (sgpers.matches()) {
exp = 2;
// check whether target has another numerus as the pronoun to be
// replaced
if (!(checkPl(tokens)) || (targetThing) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(
singularThirdPersonPronounString, currentTarget);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(
singularThirdPersonPronounString, currentTarget);
}
}
if (sgthing.matches()) {
exp = 1;
// check whether target has another numerus as the pronoun to be
// replaced and is a person
if (!(checkPl(tokens)) || (targetPerson) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(
singularThirdThingPronounString, currentTarget);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(
singularThirdThingPronounString, currentTarget);
}
}
if (plpers.matches()) {
currentQuestionString = currentQuestionString.replaceAll(
pluralThirdPersonPronounString, currentTarget);
}
questions[next].setQuestionString(currentQuestionString);
// System.out.println(next+ "Replaced:" +
// questions[next].getQuestionString());
// System.out.println("#########################################################");
MsgPrinter.printResolvedQuestion(questions[next].getQuestionString());
}
/**
*
* @param target
* @param next
*/
public static void resolveNounPhrasesToTarget(TRECTarget target, int next) {
String targetString = target.getCondensedTarget();
TRECQuestion[] questionsArray = target.getQuestions();
String question = questionsArray[next].getQuestionString();
ArrayList<String> temp = resoveNP(targetString, question);
temp.add(question);
questionsArray[next].setQuestionString(temp.get(0));
MsgPrinter.printResolvedQuestion(question);
}
/**
* Resolves questions given a target
* @param targetString target
* @param question question
* @return ArrayList<String> of resolved questions
*/
private static ArrayList<String> resoveNP(String targetString, String question) {
String targetParse = parse(targetString);
Map<Integer, String> nptarget = find(targetParse, "NP");
ArrayList<String> temp = new ArrayList<String>();
if (nptarget.size() > 1) {
return temp;
}
String questionParse = parse(question);
Map<Integer, String> npquestion = find(questionParse, "NP");
String resolvedq;
for (Map.Entry<Integer, String> q : npquestion.entrySet()) {
String npq = q.getValue();
int iq = q.getKey();
resolvedq = "";
// Replace target nouns
for (Map.Entry<Integer, String> t : nptarget.entrySet()) {
String npt = t.getValue();
if (npq.contains("'") && !npt.contains("'")) {
npt = npt.substring(0, npt.length() - 2) + ") (POS '";
if (unparse(npt).endsWith("s")) {
npt += "))";
} else {
npt += "s))";
}
}
if (npq.contains("(DT the)") && !npt.contains("(DT the)")) {
npq = npq.replaceFirst("\\(NP \\(DT the\\) ", "(NP ");
iq += 9;
npt = npt.replaceFirst("\\(NP ", "(NP (DT the) ");
}
if (match(npq, npt)) {
resolvedq = unparse(substitute(questionParse, npt, npq.length(), iq));
char c = question.charAt(question.length() - 1);
resolvedq = resolvedq.substring(0, resolvedq.length() - 3) + c;
resolvedq = resolvedq.replace("`` ", "\"");
resolvedq = resolvedq.replace(" ''", "\"");
resolvedq = resolvedq.replace(" '", "'");
resolvedq = resolvedq.replace("-LRB- ", "(");
resolvedq = resolvedq.replace(" -RRB-", ")");
// System.out.println(resolvedq);
// System.out.println("----------------------");
if (!resolvedq.equalsIgnoreCase(question)) {
temp.add(resolvedq);
}
}
}
}
return temp;
}
/**
* Returns the lexical parse of the string
*/
private static String parse(String q) {
return StanfordParser.parse(q);
}
/**
* Finds all POS instances in parse
* @param parse input parsed string
* @param POS part of speech tag
* @return A <code>Map</code> of POS tagged strings in input along with the position where they occur
*/
private static Map<Integer, String> find(String parse, String POS) {
Map<Integer, String> map = new HashMap<Integer, String>();
int i = parse.indexOf("(" + POS);
while (i != -1) {
// get NP from parse starting at i
int count = -1;
int j = i;
String temp = "";
do {
char c = parse.charAt(j++);
temp += c;
if (c == '(') {
count++;
}
if (c == ')') {
count--;
}
} while (count != -1);
map.put(i, temp);
i = parse.indexOf("(" + POS, i + 1);
}
return map;
}
/**
* Substitutes a target noun phrase for another noun phrase within a question string
* @param questionParse question String
* @param npt target noun phrase
* @param lenq length of original noun phrase in question string
* @param iq position of original noun phrase in question string
* @return resolved question string
*/
private static String substitute(String questionParse, String npt, int lenq, int iq) {
String left = questionParse.substring(0, iq);
String right = questionParse.substring(iq + lenq);
questionParse = left + npt + right;
return questionParse;
}
/**
* Gets the original string back from its parse
*/
private static String unparse(String questionParse) {
String[] split = questionParse.split(" ");
String temp = "";
for (String s : split) {
int i = s.indexOf(")");
if (i > -1) {
temp += s.substring(0, i) + " ";
}
}
return temp;
}
/**
* Checks if the first phrase is inclusive of the second
* @param npq parsed string
* @param npt parsed string
*/
private static boolean match(String npq, String npt) {
String q = unparse(npq).replace("'s", "").replace("'", "");
String t = unparse(npt).replace("'s", "").replace("'", "");
boolean exists;
for (String token1 : q.split(" ")) {
token1 = SnowballStemmer.stem(token1);
exists = false;
for (String token2 : t.split(" ")) {
token2 = SnowballStemmer.stem(token2);
// System.out.println(token1 + ":" + token2);
if (token1.equalsIgnoreCase(token2)) {
exists = true;
break;
}
}
if (!exists) {
return false;
}
}
return true;
}
/*
* check whether target is pl @param Tokens of the target String @return
* true iff it is pl else false
*/
private static boolean checkPl(String[] targetTokens) {
if ((targetTokens.length == 1) && (targetTokens[0].endsWith("s"))) {
return true;
} else
return false;
}
/*
* getAnswerType returns AnswerType of given question @param stemmed
* question string @return expected answer types
*
*/
// private static String[] getAnswerType(String question){
// String qn = QuestionNormalizer.normalize(question);
// String stemmed = QuestionNormalizer.stemVerbsAndNouns(qn);
// return AnswerTypeTester.getAnswerTypes(qn, stemmed);
// }
/*
* isAnswerTypePerson returns true if AnswerType is person, else false
* @param stemmed question string @return boolean
*
*/
private static boolean isAnswerTypePerson(String question) {
ArrayList<Pattern> patterns = new ArrayList<Pattern>();
boolean f = false;
String qn = QuestionNormalizer.normalize(question);
String stemmed = QuestionNormalizer.stemVerbsAndNouns(qn);
String[] tokens = new String[3];
tokens[0] = "who";
tokens[1] = "whom";
tokens[2] = "(what|which|name) (.* )?(actor|actress|adventurer|architect|artist|assassin|aunt|author|boy|builder|chairman|chancellor|child|creator|dancer|daughter|designer|developer|dictator|discoverer|emperor|employee|enemy|explorer|father|founder|friend|girl|governor|graduate|guy|head|hostage|husband|individual|inventor|killer|leader|maker|man|member|minister|monarch|mother|murderer|musician|official|opponent|owner|partner|person|personnel|player|politician|president|recipient|ruler|scientist|secretary|sender|singer|slayer|son|student|terrorist|uncle|victim|wife|winner|witness|woman|writer)";
for (int i = 0; i < tokens.length; i++) {
patterns.add(Pattern.compile("\\b" + tokens[i] + "\\b",
Pattern.CASE_INSENSITIVE));
}
for (int i = 0; i < tokens.length; i++) {
Matcher m = patterns.get(i).matcher(stemmed);
if (m.find()) {
f = true;
}
}
return f;
}
/*
* isAnswerTypeThing returns true if AnswerType is thing, else false @param
* stemmed question string @return boolean
*
*/
private static boolean isAnswerTypeThing(String question) {
ArrayList<Pattern> patterns = new ArrayList<Pattern>();
boolean f = false;
String qn = QuestionNormalizer.normalize(question);
String stemmed = QuestionNormalizer.stemVerbsAndNouns(qn);
String[] tokens = new String[1];
tokens[0] = "(what|which)";
for (int i = 0; i < tokens.length; i++) {
patterns.add(Pattern.compile("\\b" + tokens[i] + "\\b",
Pattern.CASE_INSENSITIVE));
}
for (int i = 0; i < tokens.length; i++) {
Matcher m = patterns.get(i).matcher(stemmed);
if (m.find()) {
f = true;
}
}
return f;
}
/*
* usePreviousAnswer returns Previous Answer @param questions @param next
* (current number) @param exp: expected answer type: 1: thing, 2: person
* @return previous answer
*
*/
private static String usePreviousAnswer(TRECQuestion[] questions, int next,
int exp) {
// System.out.println("JETZT!!");
// return null;
if (questions.length == 0)
return null;
int i = next - 1;
// TRECAnswer[] answers = questions[i].getAnswers();
// while (i>=0) {
// if ((i < answers.length) && (answers[i].getAnswerString()!= null)) {
// if (((exp == 1) &&
// (isAnswerTypeThing(questions[i].getQuestionString())))
// || ((exp == 2) &&
// (isAnswerTypePerson(questions[i].getQuestionString())))) {
// return answers[i].getAnswerString();
// }
// }
// i--;
// }
// use first answer of i-th question, not i-th answer of i-th question
while (i >= 0) {
TRECAnswer[] answers = questions[i].getAnswers();
if ((answers.length != 0) && (answers[0].getAnswerString() != null)) {
if (((exp == 1) && (isAnswerTypeThing(questions[i]
.getQuestionString())))
|| ((exp == 2) && (isAnswerTypePerson(questions[i]
.getQuestionString())))) {
return answers[0].getAnswerString();
}
}
i--;
}
return null;
}
public static void main(String[] args) throws Exception {
try {
// StanfordParser.initialize();
OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz",
"res/nlp/postagger/opennlp/tagdict");
OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz");
// SnowballStemmer.create();
} catch (Exception e) {
e.printStackTrace();
}
TRECTarget[] targets = TREC13To16Parser.loadTargets("res/testdata/trec/trec15questions.xml");
for (TRECTarget target : targets) {
TargetPreprocessor.preprocess(target);
String t = target.getCondensedTarget();
for (int i = 0; i < ("Target: " + t).length(); i++) {
System.out.print("=");
}
System.out.println();
System.out.println("Target: " + t);
for (int i = 0; i < ("Target: " + t).length(); i++) {
System.out.print("=");
}
System.out.println();
for (int i = 0; i < target.getQuestions().length; i++) {
if (!target.getQuestions()[i].getQuestionString().equals("Other")) {
System.out.println("Question: " + target.getQuestions()[i].getQuestionString());
// CorefResolver.resolveNounPhrasesToTarget(target, i);
CorefResolver.resolvePronounsToTarget(target, i);
System.out.println("Resolved: " + target.getQuestions()[i].getQuestionString());
}
}
System.out.println();
}
}
}