Resolve.java example

Explorer
arkref-master
- src
  - arkref
package arkref.analysis;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import arkref.analysis.ARKref.Opts;
import arkref.data.Document;
import arkref.data.Mention;
import arkref.data.Sentence;
import arkref.parsestuff.AnalysisUtilities;
import arkref.parsestuff.TregexPatternFactory;
import arkref.parsestuff.U;

import com.aliasi.util.Strings;

import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;

public class Resolve {
	public static void go(Document d) {
		U.pl("\n***  Resolve ***\n");
		Mention antecedent;
		Sentence curS = null;
		for (Mention m : d.mentions()) {
			if (m.getSentence() != curS) {
				curS = m.getSentence();
				U.pf("\n== S%-2s  %s\n", curS.ID(), curS.text());
			}
			U.pl("\n= Resolving\t" + m);
			if (m.node()==null) {
				U.pl("No parse node, skipping");
				continue;
			}
			if (Types.isPronominal(m)) {
				resolvePronoun(m, d);
			//} else if (isRelativePronoun(m)){
			//	resolveRelativePronoun(m, d);
			} else if (inAppositiveConstruction(m)) {
				resolveAppositive(m, d);
			} else if ((antecedent = findAntecedentInRoleAppositiveConstruction(m,d)) != null) {
				d.refGraph().setRef(m, antecedent);
				reportResolution("role-appos", m, antecedent);
			} else if ((antecedent = findAntecendentInPredicateNominativeConstruction(m, d)) != null) {
				d.refGraph().setRef(m, antecedent);
				reportResolution("pred-nom", m, antecedent);
			} else {
				resolveOther(m, d);
			}
			
			
			if (d.refGraph().getFinalResolutions().get(m) == null) {
				boolean hadAChance = isThereAGoldAntecedent(d, m);
				reportResolution("null", m, null, hadAChance);
				if (hadAChance && m.aceMention!=null && !Types.isPronominal(m)) {
					U.pf("%s   gold antecedent candidates:\n", m.aceMention);
					printGoldAntecedents(d,m);
				}
			}
		}
	}
	
	public static void reportResolution(String reason, Mention mention, Mention ref) {
		reportResolution(reason,mention,ref, true);
	}
	/** really anal-retentive output format to enable grep-based statistical analysis **/
	public static void reportResolution(String reason, Mention mention, Mention ref, boolean hadAChance) {
		String eval = null;
		if (mention.aceMention!=null && ref==null) {
			eval = mention.aceMention.isSingleton() ? "RIGHT " : (hadAChance ? "WRONG " : "NOCHANCE ");
		} else if (mention.aceMention!=null && ref.aceMention!=null) {
			eval = mention.aceMention.entity == ref.aceMention.entity ? "RIGHT " : (hadAChance ? "WRONG " : "NOCHANCE ");
		} else {
			eval = "";
		}
		if (ref==null) {
			U.pf("%sresolved %-15s: M%-2d           %20s\n",
					eval, reason, mention.ID(), AnalysisUtilities.abbrevTree(mention.node()));
		} else {
			U.pf("%sresolved %-15s: M%-2d -> M%-2d    %20s    ->   %-20s\n",
					eval, reason, mention.ID(), ref.ID(),
					AnalysisUtilities.abbrevTree(mention.node()),
					AnalysisUtilities.abbrevTree(ref.node()));
		}
	}	
	public static void printGoldAntecedents(Document d, Mention m) {
		for (Mention ant : d.prevMentions(m)) {
			if (ant.aceMention!=null && 
					ant.aceMention.entity == m.aceMention.entity)
				U.pf("%s,  ", ant.aceMention);
		}
		U.pf("\n");

	}
	public static boolean isThereAGoldAntecedent(Document d, Mention m) {
		if (m.aceMention == null) return true; // weird
		for (Mention ant : d.prevMentions(m)) {
			if (ant.aceMention!=null && 
					ant.aceMention.entity == m.aceMention.entity)
				return true;
		}
		return false;
	}
	
	

	/*
	private static void resolveRelativePronoun(Mention mention, Document d) {
		Tree root = mention.getSentence().rootNode();
		Tree node = mention.node();
		Tree parent = node.parent(root);
		
		parent = SyntacticPaths.getMaximalProjection(parent, root);
		
		for (Mention cand : d.prevMentions(mention)) {
			if(cand.node() == parent){
				d.refGraph().setRef(mention, cand);
				break;
			}
		}
		
		Mention ref = d.refGraph().getFinalResolutions().get(mention);
		if(ref != null){
			U.pf("resolved relative pronouns M%-2d -> M%-2d    %20s    ->   %-20s\n", 
					mention.ID(), ref.ID(), AnalysisUtilities.abbrevTree(mention.node()),
					 AnalysisUtilities.abbrevTree(ref.node()));
		}
	}


	private static boolean isRelativePronoun(Mention m) {
		if (m.node()==null) return false;
		TregexMatcher matcher = TregexPatternFactory.getPattern("__ <<# WDT|IN|WRB|WP !> __").matcher(m.node());
		return matcher.find();
	}
	 */

	/**
	 * 
	 * Note: This is slightly different than what is described in H&K EMNLP 09.
	 * I think the head rules they used were slightly different (or possibly their description is a little off).
	 * 
	 * @param m
	 * @param d
	 * @return
	 */
	private static Mention findAntecedentInRoleAppositiveConstruction(Mention m, Document d) {
		Tree root = m.getSentence().rootNode();
		Tree node = m.node();
		Tree parent = node.parent(root);
		
		//System.err.println("mention:"+node.yield().toString()+"\thead:"+node.headTerminal(AnalysisUtilities.getInstance().getHeadFinder()).yield().toString());
		if(!parent.label().value().equals("NP")){
			return null;
		}
		
		int index = parent.indexOf(node);
		if(index+1 >= parent.numChildren()){
			return null;
		}
		
		
		TregexPattern pat = TregexPatternFactory.getPattern("NP=parent !> __ <<# (NNP=head ,, NP=mention)");
		TregexMatcher matcher = pat.matcher(parent);
		while (matcher.find()) {
			if (matcher.getNode("mention") == node){
				Tree head = matcher.getNode("head");

				//find maximal projection of the head of the parent
				Tree maxProj = SyntacticPaths.getMaximalProjection(head, root);
				
				//find the mention for the parent
				for(Mention cand:d.mentions()){
					if(cand.node() == maxProj){
						if(Types.personhood(cand) == Types.Personhood.Person){
							return cand;
						}
						break;
					}
				}
			}
		}
		
		return null;
	}

	/**
	 * returns the antecedent NP or null
	 * The way this method is called could be made more efficient.  
	 * It doesn't really need to get called for every mention
	 * 
	 */
	private static Mention findAntecendentInPredicateNominativeConstruction(Mention m, Document d) {
		Tree root = m.getSentence().rootNode();
		Tree node = m.node();
		
		TregexPattern pat = TregexPatternFactory.getPattern("S < NP=np1 <+(VP) (VP < (/^VB.*/ < be|is|was|were|are|being|been) < NP=np2)");
		TregexMatcher matcher = pat.matcher(root);
		while (matcher.find()) {
			if(matcher.getNode("np2") == node){
				Tree ante  = matcher.getNode("np1");
				for(Mention m2: d.mentions()){
					if(ante == m2.node()){
						return m2;
					}
				}
			}
		}
		
		return null;
	}

	
	/**
	 * return true when m is the third child in of a parent who expands as
	 * NP -> NP , NP .*
	 * 
	 * @param m
	 * @return
	 */
	private static boolean inAppositiveConstruction(Mention m) {
		if (m.node()==null) return false;
		Tree root = m.getSentence().rootNode();
		Tree node = m.node();
		Tree parent = node.parent(root);
		
		if(parent.numChildren()<3){
			return false;
		}else if(!parent.getChild(0).label().value().equals("NP")){
			return false;
		}else if(!parent.getChild(1).label().value().equals(",")){
			return false;
		}else if(parent.indexOf(node) != 2){
			return false;
		}
	
		//check to make sure this isn't a conjunction
		for(Tree sibling: parent.getChildrenAsList()){
			if(sibling.label().value().equals("CC")){
				return false;
			}
		}
		
		return true;
	}

	
	public static void resolveAppositive(Mention mention, Document d) {
		Tree root = mention.getSentence().rootNode();
		Tree node = mention.node();
		Tree parent = node.parent(root);
		
		for (Mention cand : d.prevMentions(mention)) {
			if(cand.node() == parent) {
				d.refGraph().setRef(mention, cand);
				reportResolution("appos", mention, cand);
				break;
			}
		}
		
	}
	
	public static void resolvePronoun(Mention mention, Document d) {
		U.pl("trying to resolve as a pronoun");
		
		ArrayList<Mention> candidates = new ArrayList<Mention>();
	
		for (Mention cand : d.prevMentions(mention)) {
			boolean match = Types.checkPronominalMatch(mention, cand);
			
			if (cand.node() == null) {
				match = false;
			}else if (SyntacticPaths.aIsDominatedByB(mention, cand)){
				 // I-within-I constraint
				//U.pl("fails A dominates B test");
				match = false;
			} else if (!Types.isReflexive(mention) && SyntacticPaths.inSubjectObjectRelationship(cand, mention)){
				//U.pl("fails reflexive test");
				match = false;
			} else if (SyntacticPaths.isSubjectAndMentionInAdjunctPhrase(mention, cand)){
				//U.pl("fails adjunct test");
				match = false;
			}
		
			
			if (match) {
				String s="";
				if (mention.aceMention!=null & cand.aceMention!=null) {
					boolean gold_match = mention.aceMention.entity==cand.aceMention.entity;
					s = gold_match ? "[gold RIGHT]" : "[gold WRONG]";
				}
//				U.pf("PRONOUN CANDIDATE %s: %20s -> %s\n", s, mention, cand);
				if(cand.node() != null){
					candidates.add(cand);
				}
			} else {
//				U.pl("reject mismatch:  " + cand);
			}
		}
		// HACK HACK
//		if (Types.perspective(mention)==Types.Perspective.Second)
//			candidates.clear();
		if (candidates.size() == 0) {
			U.pl("No legal candidates");
			d.refGraph().setNullRef(mention);
		} else if (candidates.size() == 1) {
			U.pl("Single legal resolution");
			d.refGraph().setRef(mention, candidates.get(0));
		} else if (candidates.size() > 1) {
			U.pl("Finding pronoun antecedent by shortest syntactic path");
			d.refGraph().setRef(mention, SyntacticPaths.findBestCandidateByShortestPath(mention, candidates, d)); 
		}
		Mention ref = d.refGraph().getFinalResolutions().get(mention);
		if(ref != null){
			reportResolution("pronoun", mention,ref);
		}
	}



	public static void resolveOther(Mention mention, Document d) {
		//TODO SEMANTICS!
		
		ArrayList<Mention> candidates = new ArrayList<Mention>();
		
		boolean haveSemInfo = false;//Sem.haveNP(mention);
		
		for (Mention cand : d.prevMentions(mention)) {
			Boolean match = null;
			// do while(false): it's GOTO in java.  fun, eh?
			
			DecideCandidate: do {
				if (cand.node() == null) {
					match = false; break DecideCandidate;
				}
				if (Types.isPronominal(cand)) {
					// we only do pronoun-nominal matching in the other direction
					match = false; break DecideCandidate;
				}
				if (SyntacticPaths.aIsDominatedByB(mention, cand)){// I-within-I constraint
					//U.pl("rejected due to I within I");
					match = false; break DecideCandidate;
				} 
				if (SyntacticPaths.inSubjectObjectRelationship(cand, mention)){
					//U.pl("rejected due to subj-obj constraint");
					match = false; break DecideCandidate;
				} 
				if (SyntacticPaths.isSubjectAndMentionInAdjunctPhrase(mention, cand)){
					//U.pl("rejected due to adjunct constraint");
					match = false; break DecideCandidate;
				} 
				if (mention.hasSameHeadWord(cand) || substringMatch(mention, cand)) { 
					match = true; break DecideCandidate;
				}
				if (Opts.oracleSemantics) {
					match = mention.aceMention.entity==cand.aceMention.entity;
					U.pf("SEMANTICS ORACLE %-5s\t%s\t%s\n",
							match ? "MATCH" : "DIFF",
							Strings.normalizeWhitespace(mention.aceMention.head.charseq.text),
							Strings.normalizeWhitespace(cand.aceMention.head.charseq.text));
					 break DecideCandidate;
					
				}
				if (haveSemInfo && Sem.haveNP(cand)) {
//					if (mention.aceMention.entity==cand.aceMention.entity) {
//						U.pf("SEMANTICS PLEASE");
//					} else {
//						U.pf("SEMANTICS STOP");
//					}
//					U.pf("\t%s\t%s\n", 
//							Strings.normalizeWhitespace(mention.aceMention.head.charseq.text),
//							Strings.normalizeWhitespace(cand.aceMention.head.charseq.text));
					match = Sem.areCompatible(mention, cand);
					break DecideCandidate;
				} 
				
//				U.pl("Defaulting to reject");
				match = false;
				
			} while(false);
			assert match != null : "if/else logic screwed up!";
			
			if (match) {
//				U.pl("yay   match:\t" + cand);
				candidates.add(cand);
			} else {
//				U.pl("reject mismatch:\t" + cand);
			}
		}
		
		if (candidates.size() == 0) {
			U.pl("No legal candidates");
			d.refGraph().setNullRef(mention);
		} else if (candidates.size() == 1) {
			U.pl("Single legal resolution");
			d.refGraph().setRef(mention, candidates.get(0));
		} else if (candidates.size() > 1) {
			U.pl("Finding antecedent by shortest syntactic path");
			d.refGraph().setRef(mention, SyntacticPaths.findBestCandidateByShortestPath(mention, candidates, d)); 
		}
		
		Mention ref = d.refGraph().getFinalResolutions().get(mention);
		if (ref != null){
			reportResolution("other", mention, ref);
		}
		
		//semantics!
	}

	
	private static boolean substringMatch(Mention mention, Mention cand) {
		String mHead = mention.getHeadWord();
		String cHead = cand.getHeadWord();
		String mYield = mention.node().yield().toString();
		String cYield = cand.node().yield().toString();
		
		/*if(mHead.length() >= 5 && cHead.length() >= 5){
			if(mHead.subSequence(0, 5).equals(cHead.subSequence(0, 5))){
				return true;
			}
		}*/
		
		//both must be proper nouns
		if(mention.node().headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().indexOf("NNP") != 0
			 && cand.node().headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().indexOf("NNP") != 0){
				 return false;
		}

		/*if(mHead.indexOf(cHead)!=-1 || cHead.indexOf(mHead)!=-1){
			return true;
		}*/
		
		int matchLen = 4;
		if(mHead.length() >= matchLen && cHead.length() >= matchLen){
			if(mHead.subSequence(0, matchLen).equals(cHead.subSequence(0, matchLen))){// && LevenshteinDistance.getLevenshteinDistance(mHead, cHead) <= 5){
				return true;
			}
		}
		
		/*if(LevenshteinDistance.getLevenshteinDistance(mHead, cHead) < 5){
			return true;
		}*/
		
		return false;
	}



	
}