package arkref.analysis;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import arkref.analysis.ARKref.Opts;
import arkref.data.Document;
import arkref.data.Mention;
import arkref.data.Sentence;
import arkref.parsestuff.AnalysisUtilities;
import arkref.parsestuff.TregexPatternFactory;
import arkref.parsestuff.U;
import com.aliasi.util.Strings;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
public class Resolve {
public static void go(Document d) {
U.pl("\n*** Resolve ***\n");
Mention antecedent;
Sentence curS = null;
for (Mention m : d.mentions()) {
if (m.getSentence() != curS) {
curS = m.getSentence();
U.pf("\n== S%-2s %s\n", curS.ID(), curS.text());
}
U.pl("\n= Resolving\t" + m);
if (m.node()==null) {
U.pl("No parse node, skipping");
continue;
}
if (Types.isPronominal(m)) {
resolvePronoun(m, d);
//} else if (isRelativePronoun(m)){
// resolveRelativePronoun(m, d);
} else if (inAppositiveConstruction(m)) {
resolveAppositive(m, d);
} else if ((antecedent = findAntecedentInRoleAppositiveConstruction(m,d)) != null) {
d.refGraph().setRef(m, antecedent);
reportResolution("role-appos", m, antecedent);
} else if ((antecedent = findAntecendentInPredicateNominativeConstruction(m, d)) != null) {
d.refGraph().setRef(m, antecedent);
reportResolution("pred-nom", m, antecedent);
} else {
resolveOther(m, d);
}
if (d.refGraph().getFinalResolutions().get(m) == null) {
boolean hadAChance = isThereAGoldAntecedent(d, m);
reportResolution("null", m, null, hadAChance);
if (hadAChance && m.aceMention!=null && !Types.isPronominal(m)) {
U.pf("%s gold antecedent candidates:\n", m.aceMention);
printGoldAntecedents(d,m);
}
}
}
}
public static void reportResolution(String reason, Mention mention, Mention ref) {
reportResolution(reason,mention,ref, true);
}
/** really anal-retentive output format to enable grep-based statistical analysis **/
public static void reportResolution(String reason, Mention mention, Mention ref, boolean hadAChance) {
String eval = null;
if (mention.aceMention!=null && ref==null) {
eval = mention.aceMention.isSingleton() ? "RIGHT " : (hadAChance ? "WRONG " : "NOCHANCE ");
} else if (mention.aceMention!=null && ref.aceMention!=null) {
eval = mention.aceMention.entity == ref.aceMention.entity ? "RIGHT " : (hadAChance ? "WRONG " : "NOCHANCE ");
} else {
eval = "";
}
if (ref==null) {
U.pf("%sresolved %-15s: M%-2d %20s\n",
eval, reason, mention.ID(), AnalysisUtilities.abbrevTree(mention.node()));
} else {
U.pf("%sresolved %-15s: M%-2d -> M%-2d %20s -> %-20s\n",
eval, reason, mention.ID(), ref.ID(),
AnalysisUtilities.abbrevTree(mention.node()),
AnalysisUtilities.abbrevTree(ref.node()));
}
}
public static void printGoldAntecedents(Document d, Mention m) {
for (Mention ant : d.prevMentions(m)) {
if (ant.aceMention!=null &&
ant.aceMention.entity == m.aceMention.entity)
U.pf("%s, ", ant.aceMention);
}
U.pf("\n");
}
public static boolean isThereAGoldAntecedent(Document d, Mention m) {
if (m.aceMention == null) return true; // weird
for (Mention ant : d.prevMentions(m)) {
if (ant.aceMention!=null &&
ant.aceMention.entity == m.aceMention.entity)
return true;
}
return false;
}
/*
private static void resolveRelativePronoun(Mention mention, Document d) {
Tree root = mention.getSentence().rootNode();
Tree node = mention.node();
Tree parent = node.parent(root);
parent = SyntacticPaths.getMaximalProjection(parent, root);
for (Mention cand : d.prevMentions(mention)) {
if(cand.node() == parent){
d.refGraph().setRef(mention, cand);
break;
}
}
Mention ref = d.refGraph().getFinalResolutions().get(mention);
if(ref != null){
U.pf("resolved relative pronouns M%-2d -> M%-2d %20s -> %-20s\n",
mention.ID(), ref.ID(), AnalysisUtilities.abbrevTree(mention.node()),
AnalysisUtilities.abbrevTree(ref.node()));
}
}
private static boolean isRelativePronoun(Mention m) {
if (m.node()==null) return false;
TregexMatcher matcher = TregexPatternFactory.getPattern("__ <<# WDT|IN|WRB|WP !> __").matcher(m.node());
return matcher.find();
}
*/
/**
*
* Note: This is slightly different than what is described in H&K EMNLP 09.
* I think the head rules they used were slightly different (or possibly their description is a little off).
*
* @param m
* @param d
* @return
*/
private static Mention findAntecedentInRoleAppositiveConstruction(Mention m, Document d) {
Tree root = m.getSentence().rootNode();
Tree node = m.node();
Tree parent = node.parent(root);
//System.err.println("mention:"+node.yield().toString()+"\thead:"+node.headTerminal(AnalysisUtilities.getInstance().getHeadFinder()).yield().toString());
if(!parent.label().value().equals("NP")){
return null;
}
int index = parent.indexOf(node);
if(index+1 >= parent.numChildren()){
return null;
}
TregexPattern pat = TregexPatternFactory.getPattern("NP=parent !> __ <<# (NNP=head ,, NP=mention)");
TregexMatcher matcher = pat.matcher(parent);
while (matcher.find()) {
if (matcher.getNode("mention") == node){
Tree head = matcher.getNode("head");
//find maximal projection of the head of the parent
Tree maxProj = SyntacticPaths.getMaximalProjection(head, root);
//find the mention for the parent
for(Mention cand:d.mentions()){
if(cand.node() == maxProj){
if(Types.personhood(cand) == Types.Personhood.Person){
return cand;
}
break;
}
}
}
}
return null;
}
/**
* returns the antecedent NP or null
* The way this method is called could be made more efficient.
* It doesn't really need to get called for every mention
*
*/
private static Mention findAntecendentInPredicateNominativeConstruction(Mention m, Document d) {
Tree root = m.getSentence().rootNode();
Tree node = m.node();
TregexPattern pat = TregexPatternFactory.getPattern("S < NP=np1 <+(VP) (VP < (/^VB.*/ < be|is|was|were|are|being|been) < NP=np2)");
TregexMatcher matcher = pat.matcher(root);
while (matcher.find()) {
if(matcher.getNode("np2") == node){
Tree ante = matcher.getNode("np1");
for(Mention m2: d.mentions()){
if(ante == m2.node()){
return m2;
}
}
}
}
return null;
}
/**
* return true when m is the third child in of a parent who expands as
* NP -> NP , NP .*
*
* @param m
* @return
*/
private static boolean inAppositiveConstruction(Mention m) {
if (m.node()==null) return false;
Tree root = m.getSentence().rootNode();
Tree node = m.node();
Tree parent = node.parent(root);
if(parent.numChildren()<3){
return false;
}else if(!parent.getChild(0).label().value().equals("NP")){
return false;
}else if(!parent.getChild(1).label().value().equals(",")){
return false;
}else if(parent.indexOf(node) != 2){
return false;
}
//check to make sure this isn't a conjunction
for(Tree sibling: parent.getChildrenAsList()){
if(sibling.label().value().equals("CC")){
return false;
}
}
return true;
}
public static void resolveAppositive(Mention mention, Document d) {
Tree root = mention.getSentence().rootNode();
Tree node = mention.node();
Tree parent = node.parent(root);
for (Mention cand : d.prevMentions(mention)) {
if(cand.node() == parent) {
d.refGraph().setRef(mention, cand);
reportResolution("appos", mention, cand);
break;
}
}
}
public static void resolvePronoun(Mention mention, Document d) {
U.pl("trying to resolve as a pronoun");
ArrayList<Mention> candidates = new ArrayList<Mention>();
for (Mention cand : d.prevMentions(mention)) {
boolean match = Types.checkPronominalMatch(mention, cand);
if (cand.node() == null) {
match = false;
}else if (SyntacticPaths.aIsDominatedByB(mention, cand)){
// I-within-I constraint
//U.pl("fails A dominates B test");
match = false;
} else if (!Types.isReflexive(mention) && SyntacticPaths.inSubjectObjectRelationship(cand, mention)){
//U.pl("fails reflexive test");
match = false;
} else if (SyntacticPaths.isSubjectAndMentionInAdjunctPhrase(mention, cand)){
//U.pl("fails adjunct test");
match = false;
}
if (match) {
String s="";
if (mention.aceMention!=null & cand.aceMention!=null) {
boolean gold_match = mention.aceMention.entity==cand.aceMention.entity;
s = gold_match ? "[gold RIGHT]" : "[gold WRONG]";
}
// U.pf("PRONOUN CANDIDATE %s: %20s -> %s\n", s, mention, cand);
if(cand.node() != null){
candidates.add(cand);
}
} else {
// U.pl("reject mismatch: " + cand);
}
}
// HACK HACK
// if (Types.perspective(mention)==Types.Perspective.Second)
// candidates.clear();
if (candidates.size() == 0) {
U.pl("No legal candidates");
d.refGraph().setNullRef(mention);
} else if (candidates.size() == 1) {
U.pl("Single legal resolution");
d.refGraph().setRef(mention, candidates.get(0));
} else if (candidates.size() > 1) {
U.pl("Finding pronoun antecedent by shortest syntactic path");
d.refGraph().setRef(mention, SyntacticPaths.findBestCandidateByShortestPath(mention, candidates, d));
}
Mention ref = d.refGraph().getFinalResolutions().get(mention);
if(ref != null){
reportResolution("pronoun", mention,ref);
}
}
public static void resolveOther(Mention mention, Document d) {
//TODO SEMANTICS!
ArrayList<Mention> candidates = new ArrayList<Mention>();
boolean haveSemInfo = false;//Sem.haveNP(mention);
for (Mention cand : d.prevMentions(mention)) {
Boolean match = null;
// do while(false): it's GOTO in java. fun, eh?
DecideCandidate: do {
if (cand.node() == null) {
match = false; break DecideCandidate;
}
if (Types.isPronominal(cand)) {
// we only do pronoun-nominal matching in the other direction
match = false; break DecideCandidate;
}
if (SyntacticPaths.aIsDominatedByB(mention, cand)){// I-within-I constraint
//U.pl("rejected due to I within I");
match = false; break DecideCandidate;
}
if (SyntacticPaths.inSubjectObjectRelationship(cand, mention)){
//U.pl("rejected due to subj-obj constraint");
match = false; break DecideCandidate;
}
if (SyntacticPaths.isSubjectAndMentionInAdjunctPhrase(mention, cand)){
//U.pl("rejected due to adjunct constraint");
match = false; break DecideCandidate;
}
if (mention.hasSameHeadWord(cand) || substringMatch(mention, cand)) {
match = true; break DecideCandidate;
}
if (Opts.oracleSemantics) {
match = mention.aceMention.entity==cand.aceMention.entity;
U.pf("SEMANTICS ORACLE %-5s\t%s\t%s\n",
match ? "MATCH" : "DIFF",
Strings.normalizeWhitespace(mention.aceMention.head.charseq.text),
Strings.normalizeWhitespace(cand.aceMention.head.charseq.text));
break DecideCandidate;
}
if (haveSemInfo && Sem.haveNP(cand)) {
// if (mention.aceMention.entity==cand.aceMention.entity) {
// U.pf("SEMANTICS PLEASE");
// } else {
// U.pf("SEMANTICS STOP");
// }
// U.pf("\t%s\t%s\n",
// Strings.normalizeWhitespace(mention.aceMention.head.charseq.text),
// Strings.normalizeWhitespace(cand.aceMention.head.charseq.text));
match = Sem.areCompatible(mention, cand);
break DecideCandidate;
}
// U.pl("Defaulting to reject");
match = false;
} while(false);
assert match != null : "if/else logic screwed up!";
if (match) {
// U.pl("yay match:\t" + cand);
candidates.add(cand);
} else {
// U.pl("reject mismatch:\t" + cand);
}
}
if (candidates.size() == 0) {
U.pl("No legal candidates");
d.refGraph().setNullRef(mention);
} else if (candidates.size() == 1) {
U.pl("Single legal resolution");
d.refGraph().setRef(mention, candidates.get(0));
} else if (candidates.size() > 1) {
U.pl("Finding antecedent by shortest syntactic path");
d.refGraph().setRef(mention, SyntacticPaths.findBestCandidateByShortestPath(mention, candidates, d));
}
Mention ref = d.refGraph().getFinalResolutions().get(mention);
if (ref != null){
reportResolution("other", mention, ref);
}
//semantics!
}
private static boolean substringMatch(Mention mention, Mention cand) {
String mHead = mention.getHeadWord();
String cHead = cand.getHeadWord();
String mYield = mention.node().yield().toString();
String cYield = cand.node().yield().toString();
/*if(mHead.length() >= 5 && cHead.length() >= 5){
if(mHead.subSequence(0, 5).equals(cHead.subSequence(0, 5))){
return true;
}
}*/
//both must be proper nouns
if(mention.node().headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().indexOf("NNP") != 0
&& cand.node().headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().indexOf("NNP") != 0){
return false;
}
/*if(mHead.indexOf(cHead)!=-1 || cHead.indexOf(mHead)!=-1){
return true;
}*/
int matchLen = 4;
if(mHead.length() >= matchLen && cHead.length() >= matchLen){
if(mHead.subSequence(0, matchLen).equals(cHead.subSequence(0, matchLen))){// && LevenshteinDistance.getLevenshteinDistance(mHead, cHead) <= 5){
return true;
}
}
/*if(LevenshteinDistance.getLevenshteinDistance(mHead, cHead) < 5){
return true;
}*/
return false;
}
}