package edu.stanford.nlp.ie.machinereading.structure;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
/**
* Each entity mention is described by a type (possibly subtype) and a span of text
*
* @author Andrey Gusev
* @author Mihai
*/
public class EntityMention extends ExtractionObject {
private static final long serialVersionUID = -2745903102654191527L;
/** Mention type, if available, e.g., nominal */
private final String mentionType;
private String corefID = "-1";
/**
* Offsets the head span, e.g., "George Bush" in the extent "the president George Bush"
* The offsets are relative to the sentence containing this mention
*/
private Span headTokenSpan;
/**
* Position of the syntactic head word of this mention, e.g., "Bush" for the head span "George Bush"
* The offset is relative the sentence containing this mention
* Note: use headTokenSpan when sequence tagging entity mentions not this.
* This is meant to be used only for event/relation feature extraction!
*/
private int syntacticHeadTokenPosition;
private String normalizedName;
public EntityMention(String objectId,
CoreMap sentence,
Span extentSpan,
Span headSpan,
String type,
String subtype,
String mentionType) {
super(objectId, sentence, extentSpan, type, subtype);
this.mentionType = (mentionType != null ? mentionType.intern() : null);
this.headTokenSpan = headSpan;
this.syntacticHeadTokenPosition = -1;
this.normalizedName = null;
}
public String getCorefID(){
return corefID;
}
public void setCorefID(String id) {
this.corefID = id;
}
public String getMentionType() { return mentionType; }
public Span getHead() { return headTokenSpan; }
public int getHeadTokenStart() {
return headTokenSpan.start();
}
public int getHeadTokenEnd() {
return headTokenSpan.end();
}
public void setHeadTokenSpan(Span s) {
headTokenSpan = s;
}
public void setHeadTokenPosition(int i) {
this.syntacticHeadTokenPosition = i;
}
public int getSyntacticHeadTokenPosition() {
return this.syntacticHeadTokenPosition;
}
public CoreLabel getSyntacticHeadToken() {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
return tokens.get(syntacticHeadTokenPosition);
}
public Tree getSyntacticHeadTree() {
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
return tree.getLeaves().get(syntacticHeadTokenPosition);
}
public String getNormalizedName() { return normalizedName; }
public void setNormalizedName(String n) { normalizedName = n; }
/*
@Override
public boolean equals(Object other) {
if(! (other instanceof EntityMention)) return false;
ExtractionObject o = (ExtractionObject) other;
if(o.objectId.equals(objectId) && o.sentence == sentence) return true;
return false;
}
*/
@Override
public boolean equals(Object other) {
if(! (other instanceof EntityMention)) return false;
EntityMention otherEnt = (EntityMention) other;
return equals(otherEnt, true);
}
public boolean headIncludes(EntityMention otherEnt, boolean useSubType) {
return otherEnt.getSyntacticHeadTokenPosition() >= getHeadTokenStart() &&
otherEnt.getSyntacticHeadTokenPosition() < getHeadTokenEnd() &&
((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
( ! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null)));
}
public boolean equals(EntityMention otherEnt, boolean useSubType) {
//
// two mentions are equal if they are over the same sentence,
// have the same head span, the same type/subtype, and the same text.
// We need this for scoring NER, and in various places in KBP
//
if(sentence.get(CoreAnnotations.TextAnnotation.class).equals(otherEnt.sentence.get(CoreAnnotations.TextAnnotation.class)) && textEquals(otherEnt) && labelEquals(otherEnt, useSubType)){
return true;
}
/*
if(((headTokenSpan != null && headTokenSpan.equals(otherEnt.headTokenSpan)) ||
(extentTokenSpan != null && extentTokenSpan.equals(otherEnt.extentTokenSpan))) &&
((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
(! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null))) &&
AnnotationUtils.getTextContent(sentence, headTokenSpan).equals(AnnotationUtils.getTextContent(otherEnt.getSentence(), otherEnt.headTokenSpan))){
return true;
}
*/
return false;
}
/**
* Compares the labels of the two mentions
* @param otherEnt
* @param useSubType
*/
public boolean labelEquals(EntityMention otherEnt, boolean useSubType) {
if(((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
(! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null)))){
return true;
}
return false;
}
/**
* Compares the text spans of the two entity mentions.
*
* @param otherEnt
*/
public boolean textEquals(EntityMention otherEnt) {
//
// we attempt three comparisons:
// a) if syntactic heads are defined we consider two texts similar if they have the same syntactic head
// (this is necessary because in NFL we compare entities with different spans but same heads, e.g. "49ers" vs "San Francisco 49ers"
// b) if head spans are defined we consider two texts similar if they have the same head span
// c) if extent spans are defined we consider two texts similar if they have the same extent span
//
if(syntacticHeadTokenPosition != -1 && otherEnt.syntacticHeadTokenPosition != -1){
if(syntacticHeadTokenPosition == otherEnt.syntacticHeadTokenPosition) return true;
return false;
}
if(headTokenSpan != null && otherEnt.headTokenSpan != null){
if(headTokenSpan.equals(otherEnt.headTokenSpan)) return true;
return false;
}
if(extentTokenSpan != null && otherEnt.extentTokenSpan != null){
if(extentTokenSpan.equals(otherEnt.extentTokenSpan)) return true;
return false;
}
if (!this.getExtentString().equals(otherEnt.getExtentString())) {
return false;
}
return false;
}
/**
* Get the text value of this entity.
* The headTokenSpan MUST be set before calling this method!
*/
public String getValue() {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
// int lastEnd = -1;
StringBuilder sb = new StringBuilder();
for (int i = headTokenSpan.start(); i < headTokenSpan.end(); i ++){
CoreLabel token = tokens.get(i);
// we are not guaranteed to have CharacterOffsets so we can't use them...
/*
Integer start = token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
Integer end = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
if (start != null && end != null) {
if (lastEnd != -1 && !start.equals(lastEnd)) {
sb.append(StringUtils.repeat(" ", start - lastEnd));
lastEnd = end;
}
} else {
if (lastEnd != -1) sb.append(" ");
lastEnd = 0;
}
*/
if(i > headTokenSpan.start()) sb.append(" ");
sb.append(token.word());
}
return sb.toString();
}
@Override
public String toString() {
return "EntityMention [type=" + type
+ (subType != null ? ", subType=" + subType : "")
+ (mentionType != null ? ", mentionType=" + mentionType : "")
+ (objectId != null ? ", objectId=" + objectId : "")
+ (headTokenSpan != null ? ", hstart=" + headTokenSpan.start() + ", hend=" + headTokenSpan.end() : "")
+ (extentTokenSpan != null ? ", estart=" + extentTokenSpan.start() + ", eend=" + extentTokenSpan.end() : "")
+ (syntacticHeadTokenPosition >= 0 ? ", headPosition=" + syntacticHeadTokenPosition : "")
+ (headTokenSpan != null ? ", value=\"" + getValue() + "\"" : "")
+ (normalizedName != null ? ", normalizedName=\"" + normalizedName + "\"" : "")
+ ", corefID=" + corefID
+ (typeProbabilities != null ? ", probs=" + probsToString() : "")
+ "]";
}
@Override
public int hashCode() {
int result = mentionType != null ? mentionType.hashCode() : 0;
result = 31 * result + (headTokenSpan != null ? headTokenSpan.hashCode() : 0);
result = 31 * result + (normalizedName != null ? normalizedName.hashCode() : 0);
result = 31 * result + (extentTokenSpan != null ? extentTokenSpan.hashCode() : 0);
return result;
}
static class CompByHead implements Comparator<EntityMention> {
public int compare(EntityMention o1, EntityMention o2) {
if(o1.getHeadTokenStart() < o2.getHeadTokenStart()){
return -1;
} else if(o1.getHeadTokenStart() > o2.getHeadTokenStart()){
return 1;
} else if(o1.getHeadTokenEnd() < o2.getHeadTokenEnd()) {
return -1;
} else if(o1.getHeadTokenEnd() > o2.getHeadTokenEnd()) {
return 1;
} else {
return 0;
}
}
}
public static void sortByHeadSpan(List<EntityMention> mentions) {
Collections.sort(mentions, new CompByHead());
}
private static int MENTION_COUNTER = 0;
/**
* Creates a new unique id for an entity mention
* @return the new id
*/
public static synchronized String makeUniqueId() {
MENTION_COUNTER ++;
return "EntityMention-" + MENTION_COUNTER;
}
}