package arkref.data;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import arkref.ace.AceDocument;
import arkref.analysis.Types;
import arkref.parsestuff.AnalysisUtilities;
import arkref.parsestuff.TregexPatternFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon;
import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern;
import edu.stanford.nlp.util.Pair;
public class Mention implements Serializable{
private static final long serialVersionUID = 3218834840031746390L;
private Tree node;
private Sentence sentence;
private int id;
public AceDocument.Mention aceMention; // for convenience
public Mention(int id, Sentence sentence, Tree node) { this.id=id; this.sentence=sentence; this.node=node; }
public String neType() {
// using head word strongly outperforms using right-most
//List<Tree> leaves = node.getLeaves();
//Tree rightmost = leaves.get(leaves.size()-1);
//return sentence.neType(rightmost);
if (node==null) {
// TODO wrong!!! can get from Word alignment
return "O";
}
Tree head = node.headTerminal(AnalysisUtilities.getInstance().getHeadFinder());
return sentence.neType(head);
}
public boolean isName() { return !neType().equals("O"); }
public String toString() {
String g = safeToString(Types.gender(this));
String n = safeToString(Types.number(this));
String p = safeToString(Types.personhood(this));
return String.format("M%-2d | S%-2d | %3s %2s %4s | %-12s | %s", id, sentence.ID(),
g, n, p, neType(), node);
}
public String safeToString(Object o) {
if (o==null) return "";
return o.toString();
}
public int ID() {
return id;
}
public Tree node() {
return node;
}
public String getHeadWord(){
Tree headTerminalNode = getHeadNode();
if (headTerminalNode==null) {
// TODO tricky: use the token span alignments and do guesswork if length>1.
// for now, bailing...
return "NO_HEAD_WORD";
}
return headTerminalNode.yield().toString();
}
public Tree getHeadNode(){
if (node==null) {
// TODO tricky: use the token span alignments and do guesswork if length>1.
// for now, bailing...
return null;
}
Tree res = node.headTerminal(AnalysisUtilities.getInstance().getHeadFinder());
String yield = res.yield().toString();
if(yield.equals("'s")){
Tree copy = node.deeperCopy();
List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
TregexPattern matchPattern = TregexPatternFactory.getPattern("POS=pos");
ps.add(Tsurgeon.parseOperation("prune pos"));
TsurgeonPattern p = Tsurgeon.collectOperations(ps);
ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p));
Tsurgeon.processPatternsOnTree(ops, copy);
res = copy.headTerminal(AnalysisUtilities.getInstance().getHeadFinder());
}
return res;
}
public Sentence getSentence() {
return sentence;
}
public boolean hasSameHeadWord(Mention cand) {
String head = getHeadWord();
String candHead = cand.getHeadWord();
return head.equalsIgnoreCase(candHead);
}
}