//
// StanfordCoreNLP -- a suite of NLP tools
// Copyright (c) 2009-2010 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
//
package edu.stanford.nlp.dcoref;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import edu.stanford.nlp.dcoref.Dictionaries.Animacy;
import edu.stanford.nlp.dcoref.Dictionaries.Gender;
import edu.stanford.nlp.dcoref.Dictionaries.MentionType;
import edu.stanford.nlp.dcoref.Dictionaries.Number;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.IntTuple;
/**
* Output of (deterministic) coref system. Each CorefChain represents a set
* of mentions in the text which should all correspond to the same actual
* entity. There is a representative mention, which stores the best
* mention of an entity, and then there is a List of all mentions
* that are coreferent with that mention. The mentionMap maps from pairs of
* a sentence number and a head word index to a CorefMention. The chainID is
* an arbitrary integer for the chain number.
*
* @author Heeyoung Lee
*/
public class CorefChain implements Serializable {
private final int chainID;
private final List<CorefMention> mentions;
private final Map<IntPair, Set<CorefMention>> mentionMap;
/** The most representative mention in this cluster */
private final CorefMention representative;
@Override
public boolean equals(Object aThat) {
if (this == aThat)
return true;
if (!(aThat instanceof CorefChain))
return false;
CorefChain that = (CorefChain) aThat;
if (chainID != that.chainID)
return false;
if (!mentions.equals(that.mentions))
return false;
if (representative == null && that.representative == null) {
return true;
}
if (representative == null || that.representative == null ||
! representative.equals(that.representative)) {
return false;
}
// mentionMap is another view of mentions, so no need to compare
// that once we've compared mentions
return true;
}
@Override
public int hashCode() {
return mentions.hashCode();
}
/** get List of CorefMentions */
public List<CorefMention> getMentionsInTextualOrder() { return mentions; }
/** get CorefMentions by position (sentence number, headIndex) Can be multiple mentions sharing headword */
public Set<CorefMention> getMentionsWithSameHead(IntPair position) { return mentionMap.get(position); }
/** get CorefMention by position */
public Set<CorefMention> getMentionsWithSameHead(int sentenceNumber, int headIndex) {
return getMentionsWithSameHead(new IntPair(sentenceNumber, headIndex));
}
public Map<IntPair, Set<CorefMention>> getMentionMap() { return mentionMap; }
/** Return the most representative mention in the chain.
* Proper mention and a mention with more pre-modifiers are preferred.
*/
public CorefMention getRepresentativeMention() { return representative; }
public int getChainID() { return chainID; }
/** Mention for coref output. This is one instance of the entity
* referred to by a given CorefChain.
*/
public static class CorefMention implements Serializable {
public final MentionType mentionType;
public final Number number;
public final Gender gender;
public final Animacy animacy;
/**
* Starting word number, indexed from 1
*/
public final int startIndex;
/**
* One past the end word number, indexed from 1
*/
public final int endIndex;
/**
* Head word of the mention
*/
public final int headIndex;
public final int corefClusterID;
public final int mentionID;
/**
* Sentence number in the document containing this mention,
* indexed from 1.
*/
public final int sentNum;
/**
* Position is a binary tuple of (sentence number, mention number
* in that sentence). This is used for indexing by mention.
*/
public final IntTuple position;
public final String mentionSpan;
/** This constructor is used to recreate a CorefMention following serialization. */
public CorefMention(MentionType mentionType,
Number number,
Gender gender,
Animacy animacy,
int startIndex,
int endIndex,
int headIndex,
int corefClusterID,
int mentionID,
int sentNum,
IntTuple position,
String mentionSpan) {
this.mentionType = mentionType;
this.number = number;
this.gender = gender;
this.animacy = animacy;
this.startIndex = startIndex;
this.endIndex = endIndex;
this.headIndex = headIndex;
this.corefClusterID = corefClusterID;
this.mentionID = mentionID;
this.sentNum = sentNum;
this.position = position;
this.mentionSpan = mentionSpan;
}
/** This constructor builds the external CorefMention class from the internal Mention. */
public CorefMention(Mention m, IntTuple pos){
mentionType = m.mentionType;
number = m.number;
gender = m.gender;
animacy = m.animacy;
startIndex = m.startIndex + 1;
endIndex = m.endIndex + 1;
headIndex = m.headIndex + 1;
corefClusterID = m.corefClusterID;
sentNum = m.sentNum + 1;
mentionID = m.mentionID;
mentionSpan = m.spanToString();
// index starts from 1
position = new IntTuple(2);
position.set(0, pos.get(0)+1);
position.set(1, pos.get(1)+1);
m.headWord.set(CorefCoreAnnotations.CorefClusterIdAnnotation.class, corefClusterID);
}
@Override
public boolean equals(Object aThat) {
if (this == aThat)
return true;
if (!(aThat instanceof CorefMention))
return false;
CorefMention that = (CorefMention) aThat;
if (mentionType != that.mentionType)
return false;
if (number != that.number)
return false;
if (gender != that.gender)
return false;
if (animacy != that.animacy)
return false;
if (startIndex != that.startIndex)
return false;
if (endIndex != that.endIndex)
return false;
if (headIndex != that.headIndex)
return false;
if (corefClusterID != that.corefClusterID)
return false;
if (mentionID != that.mentionID)
return false;
if (sentNum != that.sentNum)
return false;
if (!position.equals(that.position))
return false;
// we ignore MentionSpan as it is constructed from the tokens
// the mention is a span of, so if we know those spans are the
// same, we should be able to ignore the actual text
return true;
}
@Override
public int hashCode() {
return position.hashCode();
}
@Override
public String toString() {
return '"' + mentionSpan + "\" in sentence " + sentNum;
// return "(sentence:" + sentNum + ", startIndex:" + startIndex + "-endIndex:" + endIndex + ")";
}
private boolean moreRepresentativeThan(CorefMention m) {
if (m==null) return true;
if (mentionType != m.mentionType) {
return (mentionType == MentionType.PROPER)
|| (mentionType == MentionType.NOMINAL && m.mentionType == MentionType.PRONOMINAL);
} else {
// First, check length
if (headIndex - startIndex > m.headIndex - m.startIndex) return true;
if (headIndex - startIndex < m.headIndex - m.startIndex) return false;
if (endIndex - startIndex > m.endIndex - m.startIndex) return true;
if (endIndex - startIndex < m.endIndex - m.startIndex) return false;
// Now check relative position
if (sentNum < m.sentNum) return true;
if (sentNum > m.sentNum) return false;
if (headIndex < m.headIndex) return true;
if (headIndex > m.headIndex) return false;
if (startIndex < m.startIndex) return true;
if (startIndex > m.startIndex) return false;
// At this point they're equal...
return false;
}
}
private static final long serialVersionUID = 3657691243504173L;
} // end static class CorefMention
protected static class CorefMentionComparator implements Comparator<CorefMention> {
@Override
public int compare(CorefMention m1, CorefMention m2) {
if(m1.sentNum < m2.sentNum) return -1;
else if(m1.sentNum > m2.sentNum) return 1;
else{
if(m1.startIndex < m2.startIndex) return -1;
else if(m1.startIndex > m2.startIndex) return 1;
else {
if(m1.endIndex > m2.endIndex) return -1;
else if(m1.endIndex < m2.endIndex) return 1;
else return 0;
}
}
}
}
protected static class MentionComparator implements Comparator<Mention> {
@Override
public int compare(Mention m1, Mention m2) {
if(m1.sentNum < m2.sentNum) return -1;
else if(m1.sentNum > m2.sentNum) return 1;
else{
if(m1.startIndex < m2.startIndex) return -1;
else if(m1.startIndex > m2.startIndex) return 1;
else {
if(m1.endIndex > m2.endIndex) return -1;
else if(m1.endIndex < m2.endIndex) return 1;
else return 0;
}
}
}
}
/**
* Delete a mention from this coreference chain.
* @param m The mention to delete.
*/
public void deleteMention(CorefMention m) {
this.mentions.remove(m);
IntPair position = new IntPair(m.sentNum, m.headIndex);
this.mentionMap.remove(position);
}
public CorefChain(CorefCluster c, Map<Mention, IntTuple> positions){
chainID = c.clusterID;
// Collect mentions
mentions = new ArrayList<>();
mentionMap = Generics.newHashMap();
CorefMention represents = null;
for (Mention m : c.getCorefMentions()) {
CorefMention men = new CorefMention(m, positions.get(m));
mentions.add(men);
}
Collections.sort(mentions, new CorefMentionComparator());
// Find representative mention
for (CorefMention men : mentions) {
IntPair position = new IntPair(men.sentNum, men.headIndex);
if (!mentionMap.containsKey(position)) mentionMap.put(position, Generics.<CorefMention>newHashSet());
mentionMap.get(position).add(men);
if (men.moreRepresentativeThan(represents)) {
represents = men;
}
}
representative = represents;
}
/** Constructor required by CustomAnnotationSerializer */
public CorefChain(int cid,
Map<IntPair, Set<CorefMention>> mentionMap,
CorefMention representative) {
this.chainID = cid;
this.representative = representative;
this.mentionMap = mentionMap;
this.mentions = new ArrayList<>();
for (Set<CorefMention> ms: mentionMap.values()) {
for (CorefMention m: ms) {
this.mentions.add(m);
}
}
Collections.sort(mentions, new CorefMentionComparator());
}
public String toString(){
return "CHAIN" + this.chainID + '-' + mentions;
}
private static final long serialVersionUID = 3657691243506528L;
}