EntityMention.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.ie.machinereading.structure;

import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;

/**
 * Each entity mention is described by a type (possibly subtype) and a span of text
 *
 * @author Andrey Gusev
 * @author Mihai
 */
public class EntityMention extends ExtractionObject {

  private static final long serialVersionUID = -2745903102654191527L;

  /** Mention type, if available, e.g., nominal */
  private final String mentionType;
  private String corefID = "-1";

  /**
   * Offsets the head span, e.g., "George Bush" in the extent "the president George Bush"
   * The offsets are relative to the sentence containing this mention
   */
  private Span headTokenSpan;

  /**
   * Position of the syntactic head word of this mention, e.g., "Bush" for the head span "George Bush"
   * The offset is relative the sentence containing this mention
   * Note: use headTokenSpan when sequence tagging entity mentions not this.
   *       This is meant to be used only for event/relation feature extraction!
   */
  private int syntacticHeadTokenPosition;

  private String normalizedName;

  public EntityMention(String objectId,
      CoreMap sentence,
      Span extentSpan,
      Span headSpan,
      String type,
      String subtype,
      String mentionType) {
    super(objectId, sentence, extentSpan, type, subtype);
    this.mentionType = (mentionType != null ? mentionType.intern() : null);
    this.headTokenSpan = headSpan;
    this.syntacticHeadTokenPosition = -1;
    this.normalizedName = null;
  }

  public String getCorefID(){
    return corefID;
  }

  public void setCorefID(String id) {
    this.corefID = id;
  }
  public String getMentionType() { return mentionType; }

  public Span getHead() { return headTokenSpan; }

  public int getHeadTokenStart() {
    return headTokenSpan.start();
  }

  public int getHeadTokenEnd() {
    return headTokenSpan.end();
  }

  public void setHeadTokenSpan(Span s) {
    headTokenSpan = s;
  }

  public void setHeadTokenPosition(int i) {
    this.syntacticHeadTokenPosition = i;
  }

  public int getSyntacticHeadTokenPosition() {
    return this.syntacticHeadTokenPosition;
  }

  public CoreLabel getSyntacticHeadToken() {
    List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    return tokens.get(syntacticHeadTokenPosition);
  }

  public Tree getSyntacticHeadTree() {
    Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    return tree.getLeaves().get(syntacticHeadTokenPosition);
  }

  public String getNormalizedName() { return normalizedName; }
  public void setNormalizedName(String n) { normalizedName = n; }

  /*
  @Override
  public boolean equals(Object other) {
    if(! (other instanceof EntityMention)) return false;
    ExtractionObject o = (ExtractionObject) other;
    if(o.objectId.equals(objectId) && o.sentence == sentence) return true;
    return false;
  }
   */

  @Override
  public boolean equals(Object other) {
    if(! (other instanceof EntityMention)) return false;
    EntityMention otherEnt = (EntityMention) other;
    return equals(otherEnt, true);
  }

  public boolean headIncludes(EntityMention otherEnt, boolean useSubType) {
    return otherEnt.getSyntacticHeadTokenPosition() >= getHeadTokenStart() &&
            otherEnt.getSyntacticHeadTokenPosition() < getHeadTokenEnd() &&
            ((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
            ( ! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null)));
  }

  public boolean equals(EntityMention otherEnt, boolean useSubType) {
    //
    // two mentions are equal if they are over the same sentence,
    // have the same head span, the same type/subtype, and the same text.
    // We need this for scoring NER, and in various places in KBP
    //
    if(sentence.get(CoreAnnotations.TextAnnotation.class).equals(otherEnt.sentence.get(CoreAnnotations.TextAnnotation.class)) && textEquals(otherEnt) && labelEquals(otherEnt, useSubType)){
      return true;
    }
    /*
  	if(((headTokenSpan != null && headTokenSpan.equals(otherEnt.headTokenSpan)) ||
        (extentTokenSpan != null && extentTokenSpan.equals(otherEnt.extentTokenSpan))) &&
        ((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
        (! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null))) &&
        AnnotationUtils.getTextContent(sentence, headTokenSpan).equals(AnnotationUtils.getTextContent(otherEnt.getSentence(), otherEnt.headTokenSpan))){
      return true;
    }
     */
    return false;
  }

  /**
   * Compares the labels of the two mentions
   * @param otherEnt
   * @param useSubType
   */
  public boolean labelEquals(EntityMention otherEnt, boolean useSubType) {
    if(((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
        (! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null)))){
      return true;
    }
    return false;
  }

  /**
   * Compares the text spans of the two entity mentions.
   *
   * @param otherEnt
   */
  public boolean textEquals(EntityMention otherEnt) {
    //
    // we attempt three comparisons:
    // a) if syntactic heads are defined we consider two texts similar if they have the same syntactic head
    //    (this is necessary because in NFL we compare entities with different spans but same heads, e.g. "49ers" vs "San Francisco 49ers"
    // b) if head spans are defined we consider two texts similar if they have the same head span
    // c) if extent spans are defined we consider two texts similar if they have the same extent span
    //
    if(syntacticHeadTokenPosition != -1 && otherEnt.syntacticHeadTokenPosition != -1){
      if(syntacticHeadTokenPosition == otherEnt.syntacticHeadTokenPosition) return true;
      return false;
    }

    if(headTokenSpan != null && otherEnt.headTokenSpan != null){
      if(headTokenSpan.equals(otherEnt.headTokenSpan)) return true;
      return false;
    }

    if(extentTokenSpan != null && otherEnt.extentTokenSpan != null){
      if(extentTokenSpan.equals(otherEnt.extentTokenSpan)) return true;
      return false;
    }

    if (!this.getExtentString().equals(otherEnt.getExtentString())) {
      return false;
    }

    return false;
  }

  /**
   * Get the text value of this entity.
   * The headTokenSpan MUST be set before calling this method!
   */
  public String getValue() {
    List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    // int lastEnd = -1;
    StringBuilder sb = new StringBuilder();
    for (int i = headTokenSpan.start(); i < headTokenSpan.end(); i ++){
      CoreLabel token = tokens.get(i);

      // we are not guaranteed to have CharacterOffsets so we can't use them...
      /*
    	Integer start = token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    	Integer end = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);

    	if (start != null && end != null) {
    	  if (lastEnd != -1 && !start.equals(lastEnd)) {
    	    sb.append(StringUtils.repeat(" ", start - lastEnd));
    	    lastEnd = end;
    	  }
    	} else {
    	  if (lastEnd != -1) sb.append(" ");
    	  lastEnd = 0;
    	}
       */
      if(i > headTokenSpan.start()) sb.append(" ");

      sb.append(token.word());

    }

    return sb.toString();
  }

  
  @Override
  public String toString() {
    return "EntityMention [type=" + type 
    + (subType != null ? ", subType=" + subType : "")
    + (mentionType != null ? ", mentionType=" + mentionType : "")
    + (objectId != null ? ", objectId=" + objectId : "") 
    + (headTokenSpan != null ? ", hstart=" + headTokenSpan.start() + ", hend=" + headTokenSpan.end() : "")
    + (extentTokenSpan != null ? ", estart=" + extentTokenSpan.start() + ", eend=" + extentTokenSpan.end() : "")
    + (syntacticHeadTokenPosition >= 0 ? ", headPosition=" + syntacticHeadTokenPosition : "")
    + (headTokenSpan != null ? ", value=\"" + getValue() + "\"" : "") 
    + (normalizedName != null ? ", normalizedName=\"" + normalizedName + "\"" : "")
    + ", corefID=" + corefID
    + (typeProbabilities != null ? ", probs=" + probsToString() : "")
    + "]";
  }

  @Override
  public int hashCode() {
    int result = mentionType != null ? mentionType.hashCode() : 0;
    result = 31 * result + (headTokenSpan != null ? headTokenSpan.hashCode() : 0);
    result = 31 * result + (normalizedName != null ? normalizedName.hashCode() : 0);
    result = 31 * result + (extentTokenSpan != null ? extentTokenSpan.hashCode() : 0);
    return result;
  }

  static class CompByHead implements Comparator<EntityMention> {
    public int compare(EntityMention o1, EntityMention o2) {
      if(o1.getHeadTokenStart() < o2.getHeadTokenStart()){
        return -1;
      } else if(o1.getHeadTokenStart() > o2.getHeadTokenStart()){
        return 1;
      } else if(o1.getHeadTokenEnd() < o2.getHeadTokenEnd()) {
        return -1;
      } else if(o1.getHeadTokenEnd() > o2.getHeadTokenEnd()) {
        return 1;
      } else {
        return 0;
      }
    }
  }

  public static void sortByHeadSpan(List<EntityMention> mentions) {
    Collections.sort(mentions, new CompByHead());
  }

  private static int MENTION_COUNTER = 0;

  /**
   * Creates a new unique id for an entity mention
   * @return the new id
   */
  public static synchronized String makeUniqueId() {
    MENTION_COUNTER ++;
    return "EntityMention-" + MENTION_COUNTER;
  }
}