//
// StanfordCoreNLP -- a suite of NLP tools
// Copyright (c) 2009-2010 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
//
package edu.stanford.nlp.coref.data;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import edu.stanford.nlp.coref.data.Dictionaries.Animacy;
import edu.stanford.nlp.coref.data.Dictionaries.Gender;
import edu.stanford.nlp.coref.data.Dictionaries.MentionType;
import edu.stanford.nlp.coref.data.Dictionaries.Number;
import edu.stanford.nlp.coref.data.Dictionaries.Person;
import edu.stanford.nlp.classify.LogisticClassifier;
import edu.stanford.nlp.ling.AbstractCoreLabel;
import edu.stanford.nlp.ling.BasicDatum;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.stats.IntCounter;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.UniversalEnglishGrammaticalRelations;
import edu.stanford.nlp.util.*;
/**
* One mention for the SieveCoreferenceSystem.
*
* @author Jenny Finkel, Karthik Raghunathan, Heeyoung Lee, Marta Recasens
*/
public class Mention implements CoreAnnotation<Mention>, Serializable {
private static final long serialVersionUID = -7524485803945717057L;
public Mention() {
}
public Mention(int mentionID, int startIndex, int endIndex, List<CoreLabel> sentenceWords, SemanticGraph basicDependency, SemanticGraph enhancedDependency){
this.mentionID = mentionID;
this.startIndex = startIndex;
this.endIndex = endIndex;
this.sentenceWords = sentenceWords;
this.basicDependency = basicDependency;
this.enhancedDependency = enhancedDependency;
}
public Mention(int mentionID, int startIndex, int endIndex, List<CoreLabel> sentenceWords, SemanticGraph basicDependency, SemanticGraph enhancedDependency, List<CoreLabel> mentionSpan){
this(mentionID, startIndex, endIndex, sentenceWords, basicDependency, enhancedDependency);
this.originalSpan = mentionSpan;
}
public Mention(int mentionID, int startIndex, int endIndex, List<CoreLabel> sentenceWords, SemanticGraph basicDependency, SemanticGraph enhancedDependency, List<CoreLabel> mentionSpan, Tree mentionTree){
this(mentionID, startIndex, endIndex, sentenceWords, basicDependency, enhancedDependency, mentionSpan);
this.mentionSubTree = mentionTree;
}
public MentionType mentionType;
public Number number;
public Gender gender;
public Animacy animacy;
public Person person;
public String headString;
public String nerString;
public int startIndex;
public int endIndex;
public int headIndex;
public int mentionID = -1;
public int originalRef = -1;
public IndexedWord headIndexedWord;
public int goldCorefClusterID = -1;
public int corefClusterID = -1;
public int mentionNum;
public int sentNum = -1;
public int utter = -1;
public int paragraph = -1;
public boolean isSubject;
public boolean isDirectObject;
public boolean isIndirectObject;
public boolean isPrepositionObject;
public IndexedWord dependingVerb;
public boolean hasTwin = false;
public boolean generic = false; // generic pronoun or generic noun (bare plurals)
public boolean isSingleton;
public List<CoreLabel> sentenceWords;
public List<CoreLabel> originalSpan;
public Tree mentionSubTree;
public Tree contextParseTree;
public CoreLabel headWord;
public SemanticGraph basicDependency;
public SemanticGraph enhancedDependency;
public Set<String> dependents = Generics.newHashSet();
public List<String> preprocessedTerms;
public Object synsets;
/** Set of other mentions in the same sentence that are syntactic appositions to this */
public Set<Mention> appositions = null;
public Set<Mention> predicateNominatives = null;
public Set<Mention> relativePronouns = null;
/** Set of other mentions in the same sentence that below to this list */
public Set<Mention> listMembers = null;
/** Set of other mentions in the same sentence that I am a member of */
public Set<Mention> belongToLists = null;
// Mention is identified as being this speaker....
public SpeakerInfo speakerInfo;
transient private String spanString = null;
transient private String lowercaseNormalizedSpanString = null;
public IntCounter<Integer> antecedentOrdering = new IntCounter<>();
@Override
public Class<Mention> getType() {
return Mention.class;
}
public boolean isPronominal() {
return mentionType == MentionType.PRONOMINAL;
}
@Override
public String toString() {
return spanToString();
}
public String spanToString() {
// synchronized(this) {
if (spanString == null) {
StringBuilder os = new StringBuilder();
for(int i = 0; i < originalSpan.size(); i ++){
if(i > 0) os.append(" ");
os.append(originalSpan.get(i).get(CoreAnnotations.TextAnnotation.class));
}
spanString = os.toString();
}
// }
return spanString;
}
public String lowercaseNormalizedSpanString() {
// synchronized(this) {
if (lowercaseNormalizedSpanString == null) {
// We always normalize to lowercase!!!
lowercaseNormalizedSpanString = spanToString().toLowerCase();
}
// }
return lowercaseNormalizedSpanString;
}
// Retrieves part of the span that corresponds to the NER (going out from head)
public List<CoreLabel> nerTokens() {
if (nerString == null || "O".equals(nerString)) return null;
int start = headIndex-startIndex;
int end = headIndex-startIndex+1;
while (start > 0) {
CoreLabel prev = originalSpan.get(start-1);
if (nerString.equals(prev.ner())) {
start--;
} else {
break;
}
}
while (end < originalSpan.size()) {
CoreLabel next = originalSpan.get(end);
if (nerString.equals(next.ner())) {
end++;
} else {
break;
}
}
return originalSpan.subList(start, end);
}
// Retrieves part of the span that corresponds to the NER (going out from head)
public String nerName() {
List<CoreLabel> t = nerTokens();
return (t != null)? StringUtils.joinWords(t, " "):null;
}
/** Set attributes of a mention:
* head string, mention type, NER label, Number, Gender, Animacy
* @throws Exception
*/
public void process(Dictionaries dict, Semantics semantics) throws Exception {
setHeadString();
setType(dict);
setNERString();
List<String> mStr = getMentionString();
setNumber(dict);
setGender(dict, getGender(dict, mStr));
setAnimacy(dict);
setPerson(dict);
setDiscourse();
if(semantics!=null) setSemantics(dict, semantics);
}
public void process(Dictionaries dict, Semantics semantics,
LogisticClassifier<String, String> singletonPredictor) throws Exception {
process(dict, semantics);
if(singletonPredictor != null) setSingleton(singletonPredictor, dict);
}
private void setSingleton(LogisticClassifier<String, String> predictor, Dictionaries dict){
double coreference_score = predictor.probabilityOf(
new BasicDatum<>(getSingletonFeatures(dict), "1"));
if(coreference_score < 0.2) this.isSingleton = true;
}
/**
* Returns the features used by the singleton predictor (logistic
* classifier) to decide whether the mention belongs to a singleton entity
*/
public ArrayList<String> getSingletonFeatures(Dictionaries dict){
ArrayList<String> features = new ArrayList<>();
features.add(mentionType.toString());
features.add(nerString);
features.add(animacy.toString());
int personNum = 3;
if(person.equals(Person.I) || person.equals(Person.WE)) personNum = 1;
if(person.equals(Person.YOU)) personNum = 2;
if(person.equals(Person.UNKNOWN)) personNum = 0;
features.add(String.valueOf(personNum));
features.add(number.toString());
features.add(getPosition());
features.add(getRelation());
features.add(getQuantification(dict));
features.add(String.valueOf(getModifiers(dict)));
features.add(String.valueOf(getNegation(dict)));
features.add(String.valueOf(getModal(dict)));
features.add(String.valueOf(getReportEmbedding(dict)));
features.add(String.valueOf(getCoordination()));
return features;
}
private List<String> getMentionString() {
List<String> mStr = new ArrayList<>();
for(CoreLabel l : this.originalSpan) {
mStr.add(l.get(CoreAnnotations.TextAnnotation.class).toLowerCase());
if(l==this.headWord) break; // remove words after headword
}
return mStr;
}
private Gender getGender(Dictionaries dict, List<String> mStr) {
int len = mStr.size();
char firstLetter = headWord.get(CoreAnnotations.TextAnnotation.class).charAt(0);
if(len > 1 && Character.isUpperCase(firstLetter) && nerString.startsWith("PER")) {
int firstNameIdx = len-2;
String secondToLast = mStr.get(firstNameIdx);
if(firstNameIdx > 1 && (secondToLast.length()==1 || (secondToLast.length()==2 && secondToLast.endsWith(".")))) {
firstNameIdx--;
}
for(int i = 0 ; i <= firstNameIdx ; i++){
if(dict.genderNumber.containsKey(mStr.subList(i, len))) return dict.genderNumber.get(mStr.subList(i, len));
}
// find converted string with ! (e.g., "dr. martin luther king jr. boulevard" -> "dr. !")
List<String> convertedStr = new ArrayList<>(2);
convertedStr.add(mStr.get(firstNameIdx));
convertedStr.add("!");
if(dict.genderNumber.containsKey(convertedStr)) return dict.genderNumber.get(convertedStr);
if(dict.genderNumber.containsKey(mStr.subList(firstNameIdx, firstNameIdx+1))) return dict.genderNumber.get(mStr.subList(firstNameIdx, firstNameIdx+1));
}
if(mStr.size() > 0 && dict.genderNumber.containsKey(mStr.subList(len-1, len))) return dict.genderNumber.get(mStr.subList(len-1, len));
return null;
}
private void setDiscourse() {
// utter = headWord.get(CoreAnnotations.UtteranceAnnotation.class);
Pair<IndexedWord, String> verbDependency = findDependentVerb(this);
String dep = verbDependency.second();
dependingVerb = verbDependency.first();
isSubject = false;
isDirectObject = false;
isIndirectObject = false;
isPrepositionObject = false;
if(dep==null) {
return;
} else if(dep.equals("nsubj") || dep.equals("csubj")) {
isSubject = true;
} else if(dep.equals("dobj") || dep.equals("nsubjpass")){
isDirectObject = true;
} else if(dep.equals("iobj")){
isIndirectObject = true;
} else if(dep.startsWith("nmod")
&& ! dep.equals("nmod:npmod")
&& ! dep.equals("nmod:tmod")
&& ! dep.equals("nmod:poss")
&& ! dep.equals("nmod:agent")) {
isPrepositionObject = true;
}
}
private void setPerson(Dictionaries dict) {
// only do for pronoun
if(!this.isPronominal()) {
person = Person.UNKNOWN;
return;
}
String spanToString = this.spanToString().toLowerCase();
if(dict.firstPersonPronouns.contains(spanToString)) {
if (number == Number.SINGULAR) {
person = Person.I;
} else if (number == Number.PLURAL) {
person = Person.WE;
} else {
person = Person.UNKNOWN;
}
} else if(dict.secondPersonPronouns.contains(spanToString)) {
person = Person.YOU;
} else if(dict.thirdPersonPronouns.contains(spanToString)) {
if (gender == Gender.MALE && number == Number.SINGULAR) {
person = Person.HE;
} else if (gender == Gender.FEMALE && number == Number.SINGULAR) {
person = Person.SHE;
} else if ((gender == Gender.NEUTRAL || animacy == Animacy.INANIMATE) && number == Number.SINGULAR) {
person = Person.IT;
} else if (number == Number.PLURAL) {
person = Person.THEY;
} else {
person = Person.UNKNOWN;
}
} else {
person = Person.UNKNOWN;
}
}
private void setSemantics(Dictionaries dict, Semantics semantics) throws Exception {
preprocessedTerms = this.preprocessSearchTerm();
if(dict.statesAbbreviation.containsKey(this.spanToString())) { // states abbreviations
preprocessedTerms = new ArrayList<>();
preprocessedTerms.add(dict.statesAbbreviation.get(this.spanToString()));
}
Method meth = semantics.wordnet.getClass().getDeclaredMethod("findSynset", List.class);
synsets = meth.invoke(semantics.wordnet, new Object[]{preprocessedTerms});
if(this.isPronominal()) return;
}
/** Check list member? True if this mention is inside the other mention and the other mention is a list */
public boolean isListMemberOf(Mention m) {
if (this.equals(m)) return false;
if (m.mentionType == MentionType.LIST && this.mentionType == MentionType.LIST) return false; // Don't handle nested lists
if (m.mentionType == MentionType.LIST) {
return this.includedIn(m);
}
return false;
}
public void addListMember(Mention m) {
if(listMembers == null) listMembers = Generics.newHashSet();
listMembers.add(m);
}
public void addBelongsToList(Mention m) {
if(belongToLists == null) belongToLists = Generics.newHashSet();
belongToLists.add(m);
}
public boolean isMemberOfSameList(Mention m) {
Set<Mention> l1 = belongToLists;
Set<Mention> l2 = m.belongToLists;
if (l1 != null && l2 != null && CollectionUtils.containsAny(l1, l2)) {
return true;
} else {
return false;
}
}
private boolean isListLike() {
// See if this mention looks to be a conjunction of things
// Check for "or" and "and" and ","
int commas = 0;
// boolean firstLabelLike = false;
// if (originalSpan.size() > 1) {
// String w = originalSpan.get(1).word();
// firstLabelLike = (w.equals(":") || w.equals("-"));
// }
String mentionSpanString = spanToString();
String subTreeSpanString = StringUtils.joinWords(mentionSubTree.yieldWords(), " ");
if (subTreeSpanString.equals(mentionSpanString)) {
// subtree represents this mention well....
List<Tree> children = mentionSubTree.getChildrenAsList();
for (Tree t:children) {
String label = t.value();
String ner = null;
if (t.isLeaf()) { ner = ((CoreLabel) t.getLeaves().get(0).label()).ner(); }
if ("CC".equals(label)) {
// Check NER type
if (ner == null || "O".equals(ner)) {
return true;
}
} else if (label.equals(",")) {
if (ner == null || "O".equals(ner)) {
commas++;
}
}
}
}
if (commas <= 2) {
// look at the string for and/or
boolean first = true;
for (CoreLabel t:originalSpan) {
String tag = t.tag();
String ner = t.ner();
String w = t.word();
if (tag.equals("TO") || tag.equals("IN") || tag.startsWith("VB")) {
// prepositions and verbs are too hard for us
return false;
}
if (!first) {
if (w.equalsIgnoreCase("and") || w.equalsIgnoreCase("or")) {
// Check NER type
if (ner == null || "O".equals(ner)) {
return true;
}
}
}
first = false;
}
}
return (commas > 2);
}
private boolean isListLikeByDependency() {
if(this.headIndexedWord==null) return false; // probably parser error: default is not LIST
IndexedWord conj = this.basicDependency.getChildWithReln(this.headIndexedWord, UniversalEnglishGrammaticalRelations.CONJUNCT);
boolean hasConjunction = (conj!=null);
boolean conjInMention = (hasConjunction)? this.startIndex < conj.index()-1 && conj.index()-1 < this.endIndex : false;
return conjInMention;
}
private void setType(Dictionaries dict) {
if ((this.mentionSubTree!=null && isListLike())
|| (this.mentionSubTree==null && isListLikeByDependency()) ) {
mentionType = MentionType.LIST;
//Redwood.log("debug-mention", "IS LIST: " + this);
} else if (headWord.containsKey(CoreAnnotations.EntityTypeAnnotation.class)){ // ACE gold mention type
if (headWord.get(CoreAnnotations.EntityTypeAnnotation.class).equals("PRO")) {
mentionType = MentionType.PRONOMINAL;
} else if (headWord.get(CoreAnnotations.EntityTypeAnnotation.class).equals("NAM")) {
mentionType = MentionType.PROPER;
} else {
mentionType = MentionType.NOMINAL;
}
} else { // MUC
if(!headWord.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) { // temporary fix
mentionType = MentionType.NOMINAL;
//Redwood.log("debug-mention", "no NamedEntityTagAnnotation: "+headWord);
} else if (headWord.tag().startsWith("PRP") || headWord.tag().startsWith("PN")
|| (originalSpan.size() == 1 && headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class).equals("O")
&& (dict.allPronouns.contains(headString) || dict.relativePronouns.contains(headString) ))) {
mentionType = MentionType.PRONOMINAL;
} else if (!headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class).equals("O") || headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")) {
mentionType = MentionType.PROPER;
} else {
mentionType = MentionType.NOMINAL;
}
}
}
private void setGender(Dictionaries dict, Gender genderNumberResult) {
gender = Gender.UNKNOWN;
if(genderNumberResult!=null && this.number!=Number.PLURAL){
gender = genderNumberResult;
}
if (mentionType == MentionType.PRONOMINAL) {
if (dict.malePronouns.contains(headString)) {
gender = Gender.MALE;
} else if (dict.femalePronouns.contains(headString)) {
gender = Gender.FEMALE;
}
} else {
// Bergsma or user provided list
if(gender == Gender.UNKNOWN) {
if ("PERSON".equals(nerString) || "PER".equals(nerString)) {
// Try to get gender of the named entity
// Start with first name until we get gender...
List<CoreLabel> nerToks = nerTokens();
for (CoreLabel t:nerToks) {
String name = t.word().toLowerCase();
if(dict.maleWords.contains(name)) {
gender = Gender.MALE;
break;
}
else if(dict.femaleWords.contains(name)) {
gender = Gender.FEMALE;
break;
}
}
} else {
if(dict.maleWords.contains(headString)) {
gender = Gender.MALE;
}
else if(dict.femaleWords.contains(headString)) {
gender = Gender.FEMALE;
}
else if(dict.neutralWords.contains(headString)) {
gender = Gender.NEUTRAL;
}
}
}
}
}
protected void setNumber(Dictionaries dict) {
if (mentionType == MentionType.PRONOMINAL) {
if (dict.pluralPronouns.contains(headString)) {
number = Number.PLURAL;
} else if (dict.singularPronouns.contains(headString)) {
number = Number.SINGULAR;
} else {
number = Number.UNKNOWN;
}
} else if (mentionType == MentionType.LIST) {
number = Number.PLURAL;
} else if(! nerString.equals("O") && mentionType!=MentionType.NOMINAL){
// Check to see if this is a list of things
if(! (nerString.equals("ORGANIZATION") || nerString.startsWith("ORG"))){
number = Number.SINGULAR;
} else {
// ORGs can be both plural and singular
number = Number.UNKNOWN;
}
} else {
String tag = headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class);
if (tag.startsWith("N") && tag.endsWith("S")) {
number = Number.PLURAL;
} else if (tag.startsWith("N")) {
number = Number.SINGULAR;
} else {
number = Number.UNKNOWN;
}
}
if(mentionType != MentionType.PRONOMINAL) {
if(number == Number.UNKNOWN){
if(dict.singularWords.contains(headString)) {
number = Number.SINGULAR;
}
else if(dict.pluralWords.contains(headString)) {
number = Number.PLURAL;
}
}
// // replace this with LIST mention type
// if(Constants.USE_CONSTITUENT) {
// final String enumerationPattern = "NP < (NP=tmp $.. (/,|CC/ $.. NP))";
//
// TregexPattern tgrepPattern = TregexPattern.compile(enumerationPattern);
// TregexMatcher m = tgrepPattern.matcher(this.mentionSubTree);
// while (m.find()) {
// // Tree t = m.getMatch();
// if(this.mentionSubTree==m.getNode("tmp")
// && this.spanToString().toLowerCase().contains(" and ")) {
// number = Number.PLURAL;
// }
// }
// }
}
}
private void setAnimacy(Dictionaries dict) {
if (mentionType == MentionType.PRONOMINAL) {
if (dict.animatePronouns.contains(headString)) {
animacy = Animacy.ANIMATE;
} else if (dict.inanimatePronouns.contains(headString)) {
animacy = Animacy.INANIMATE;
} else {
animacy = Animacy.UNKNOWN;
}
} else if (nerString.equals("PERSON") || nerString.startsWith("PER")) {
animacy = Animacy.ANIMATE;
} else if (nerString.equals("LOCATION")|| nerString.startsWith("LOC")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("MONEY")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("NUMBER")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("PERCENT")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("DATE")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("TIME")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("MISC")) {
animacy = Animacy.UNKNOWN;
} else if (nerString.startsWith("VEH")) {
animacy = Animacy.UNKNOWN;
} else if (nerString.startsWith("FAC")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("GPE")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("WEA")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("ORG")) {
animacy = Animacy.INANIMATE;
} else {
animacy = Animacy.UNKNOWN;
}
if(mentionType != MentionType.PRONOMINAL) {
// Better heuristics using DekangLin:
if(animacy == Animacy.UNKNOWN) {
if(dict.animateWords.contains(headString)) {
animacy = Animacy.ANIMATE;
}
else if(dict.inanimateWords.contains(headString)) {
animacy = Animacy.INANIMATE;
}
}
}
}
private static final String [] commonNESuffixes = {
"Corp", "Co", "Inc", "Ltd"
};
private static boolean knownSuffix(String s) {
if(s.endsWith(".")) s = s.substring(0, s.length() - 1);
for(String suff: commonNESuffixes){
if(suff.equalsIgnoreCase(s)){
return true;
}
}
return false;
}
private void setHeadString() {
this.headString = headWord.get(CoreAnnotations.TextAnnotation.class).toLowerCase();
String ner = headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class);
if (ner != null && !ner.equals("O")) {
// make sure that the head of a NE is not a known suffix, e.g., Corp.
int start = headIndex - startIndex;
if (originalSpan.size() > 0 && start >= originalSpan.size()) {
throw new RuntimeException("Invalid start index " + start + "=" + headIndex + "-" + startIndex
+ ": originalSpan=[" + StringUtils.joinWords(originalSpan, " ") + "], head=" + headWord);
}
while (start >= 0) {
String head = originalSpan.size() > 0 ? originalSpan.get(start).get(CoreAnnotations.TextAnnotation.class).toLowerCase() : "";
if (knownSuffix(head)) {
start --;
} else {
this.headString = head;
this.headWord = originalSpan.get(start);
this.headIndex = startIndex + start;
break;
}
}
}
this.headIndexedWord = basicDependency.getNodeByIndexSafe(headWord.index());
}
private void setNERString() {
if(headWord.containsKey(CoreAnnotations.EntityTypeAnnotation.class)){ // ACE
if(headWord.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class) &&
headWord.get(CoreAnnotations.EntityTypeAnnotation.class).equals("NAM")){
this.nerString = headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class);
} else {
this.nerString = "O";
}
}
else{ // MUC
if (headWord.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) {
this.nerString = headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class);
} else {
this.nerString = "O";
}
}
}
public boolean sameSentence(Mention m) {
return m.sentenceWords == sentenceWords;
}
private static boolean included(CoreLabel small, List<CoreLabel> big) {
if(small.tag().equals("NNP")){
for(CoreLabel w: big){
if(small.word().equals(w.word()) ||
small.word().length() > 2 && w.word().startsWith(small.word())){
return true;
}
}
}
return false;
}
public boolean headsAgree(Mention m) {
// we allow same-type NEs to not match perfectly, but rather one could be included in the other, e.g., "George" -> "George Bush"
if (!nerString.equals("O") && !m.nerString.equals("O") && nerString.equals(m.nerString) &&
(included(headWord, m.originalSpan) || included(m.headWord, originalSpan))) {
return true;
}
return headString.equals(m.headString);
}
public boolean numbersAgree(Mention m){
return numbersAgree(m, false);
}
private boolean numbersAgree(Mention m, boolean strict) {
if (strict) {
return number == m.number;
} else {
return number == Number.UNKNOWN ||
m.number == Number.UNKNOWN ||
number == m.number;
}
}
public boolean gendersAgree(Mention m){
return gendersAgree(m, false);
}
public boolean gendersAgree(Mention m, boolean strict) {
if (strict) {
return gender == m.gender;
} else {
return gender == Gender.UNKNOWN ||
m.gender == Gender.UNKNOWN ||
gender == m.gender;
}
}
public boolean animaciesAgree(Mention m){
return animaciesAgree(m, false);
}
public boolean animaciesAgree(Mention m, boolean strict) {
if (strict) {
return animacy == m.animacy;
} else {
return animacy == Animacy.UNKNOWN ||
m.animacy == Animacy.UNKNOWN ||
animacy == m.animacy;
}
}
public boolean entityTypesAgree(Mention m, Dictionaries dict){
return entityTypesAgree(m, dict, false);
}
public boolean entityTypesAgree(Mention m, Dictionaries dict, boolean strict) {
if (strict) {
return nerString.equals(m.nerString);
} else {
if (isPronominal()) {
if (nerString.contains("-") || m.nerString.contains("-")) { //for ACE with gold NE
if (m.nerString.equals("O")) {
return true;
} else if (m.nerString.startsWith("ORG")) {
return dict.organizationPronouns.contains(headString);
} else if (m.nerString.startsWith("PER")) {
return dict.personPronouns.contains(headString);
} else if (m.nerString.startsWith("LOC")) {
return dict.locationPronouns.contains(headString);
} else if (m.nerString.startsWith("GPE")) {
return dict.GPEPronouns.contains(headString);
} else if (m.nerString.startsWith("VEH") || m.nerString.startsWith("FAC") || m.nerString.startsWith("WEA")) {
return dict.facilityVehicleWeaponPronouns.contains(headString);
} else {
return false;
}
} else { // ACE w/o gold NE or MUC
switch (m.nerString) {
case "O":
return true;
case "MISC":
return true;
case "ORGANIZATION":
return dict.organizationPronouns.contains(headString);
case "PERSON":
return dict.personPronouns.contains(headString);
case "LOCATION":
return dict.locationPronouns.contains(headString);
case "DATE":
case "TIME":
return dict.dateTimePronouns.contains(headString);
case "MONEY":
case "PERCENT":
case "NUMBER":
return dict.moneyPercentNumberPronouns.contains(headString);
default:
return false;
}
}
}
return nerString.equals("O") ||
m.nerString.equals("O") ||
nerString.equals(m.nerString);
}
}
/**
* Verifies if this mention's tree is dominated by the tree of the given mention
*/
public boolean includedIn(Mention m) {
if (!m.sameSentence(this)) {
return false;
}
if(this.startIndex < m.startIndex || this.endIndex > m.endIndex) return false;
return true;
}
/**
* Detects if the mention and candidate antecedent agree on all attributes respectively.
* @param potentialAntecedent
* @return true if all attributes agree between both mention and candidate, else false.
*/
public boolean attributesAgree(Mention potentialAntecedent, Dictionaries dict){
return (this.animaciesAgree(potentialAntecedent) &&
this.entityTypesAgree(potentialAntecedent, dict) &&
this.gendersAgree(potentialAntecedent) &&
this.numbersAgree(potentialAntecedent));
}
/** Find apposition */
public void addApposition(Mention m) {
if(appositions == null) appositions = Generics.newHashSet();
appositions.add(m);
}
/** Check apposition */
public boolean isApposition(Mention m) {
if(appositions != null && appositions.contains(m)) return true;
return false;
}
/** Find predicate nominatives */
public void addPredicateNominatives(Mention m) {
if(predicateNominatives == null) predicateNominatives = Generics.newHashSet();
predicateNominatives.add(m);
}
/** Check predicate nominatives */
public boolean isPredicateNominatives(Mention m) {
if(predicateNominatives != null && predicateNominatives.contains(m)) return true;
return false;
}
/** Find relative pronouns */
public void addRelativePronoun(Mention m) {
if(relativePronouns == null) relativePronouns = Generics.newHashSet();
relativePronouns.add(m);
}
/** Find which mention appears first in a document */
public boolean appearEarlierThan(Mention m){
if (this.sentNum < m.sentNum) {
return true;
} else if (this.sentNum > m.sentNum) {
return false;
} else {
if (this.startIndex < m.startIndex) {
return true;
} else if (this.startIndex > m.startIndex) {
return false;
} else {
if (this.endIndex > m.endIndex) {
return true;
} else if (this.endIndex < m.endIndex) {
return false;
} else if (this.headIndex != m.headIndex) {
// Meaningless, but an arbitrary tiebreaker
return this.headIndex < m.headIndex;
} else if (this.mentionType != m.mentionType) {
// Meaningless, but an arbitrary tiebreaker
return this.mentionType.representativeness > m.mentionType.representativeness;
} else {
// Meaningless, but an arbitrary tiebreaker
return this.hashCode() < m.hashCode();
}
}
}
}
public String longestNNPEndsWithHead (){
String ret = "";
for (int i = headIndex; i >=startIndex ; i--){
String pos = sentenceWords.get(i).get(CoreAnnotations.PartOfSpeechAnnotation.class);
if(!pos.startsWith("NNP")) break;
if(!ret.equals("")) ret = " "+ret;
ret = sentenceWords.get(i).get(CoreAnnotations.TextAnnotation.class)+ret;
}
return ret;
}
public String lowestNPIncludesHead (){
String ret = "";
Tree head = this.contextParseTree.getLeaves().get(this.headIndex);
Tree lowestNP = head;
String s;
while(true) {
if(lowestNP==null) return ret;
s = ((CoreLabel) lowestNP.label()).get(CoreAnnotations.ValueAnnotation.class);
if(s.equals("NP") || s.equals("ROOT")) break;
lowestNP = lowestNP.ancestor(1, this.contextParseTree);
}
if (s.equals("ROOT")) lowestNP = head;
for (Tree t : lowestNP.getLeaves()){
if (!ret.equals("")) ret = ret + " ";
ret = ret + ((CoreLabel) t.label()).get(CoreAnnotations.TextAnnotation.class);
}
if(!this.spanToString().contains(ret)) return this.sentenceWords.get(this.headIndex).get(CoreAnnotations.TextAnnotation.class);
return ret;
}
public String stringWithoutArticle(String str) {
String ret = (str==null)? this.spanToString() : str;
if (ret.startsWith("a ") || ret.startsWith("A ")) {
return ret.substring(2);
} else if (ret.startsWith("an ") || ret.startsWith("An ")) {
return ret.substring(3);
} else if (ret.startsWith("the ") || ret.startsWith("The "))
return ret.substring(4);
return ret;
}
public List<String> preprocessSearchTerm (){
List<String> searchTerms = new ArrayList<>();
String[] terms = new String[4];
terms[0] = this.stringWithoutArticle(this.removePhraseAfterHead());
terms[1] = this.stringWithoutArticle(this.lowestNPIncludesHead());
terms[2] = this.stringWithoutArticle(this.longestNNPEndsWithHead());
terms[3] = this.headString;
for (String term : terms){
if(term.contains("\"")) term = term.replace("\"", "\\\"");
if(term.contains("(")) term = term.replace("(","\\(");
if(term.contains(")")) term = term.replace(")", "\\)");
if(term.contains("!")) term = term.replace("!", "\\!");
if(term.contains(":")) term = term.replace(":", "\\:");
if(term.contains("+")) term = term.replace("+", "\\+");
if(term.contains("-")) term = term.replace("-", "\\-");
if(term.contains("~")) term = term.replace("~", "\\~");
if(term.contains("*")) term = term.replace("*", "\\*");
if(term.contains("[")) term = term.replace("[", "\\[");
if(term.contains("]")) term = term.replace("]", "\\]");
if(term.contains("^")) term = term.replace("^", "\\^");
if(term.equals("")) continue;
if(term.equals("") || searchTerms.contains(term)) continue;
if(term.equals(terms[3]) && !terms[2].equals("")) continue;
searchTerms.add(term);
}
return searchTerms;
}
public static String buildQueryText(List<String> terms) {
String query = "";
for (String t : terms){
query += t + " ";
}
return query.trim();
}
/** Remove any clause after headword */
public String removePhraseAfterHead(){
String removed ="";
int posComma = -1;
int posWH = -1;
for(int i = 0 ; i < this.originalSpan.size() ; i++){
CoreLabel w = this.originalSpan.get(i);
if(posComma == -1 && w.get(CoreAnnotations.PartOfSpeechAnnotation.class).equals(",")) posComma = this.startIndex + i;
if(posWH == -1 && w.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("W")) posWH = this.startIndex + i;
}
if(posComma!=-1 && this.headIndex < posComma){
StringBuilder os = new StringBuilder();
for(int i = 0; i < posComma-this.startIndex; i++){
if(i > 0) os.append(" ");
os.append(this.originalSpan.get(i).get(CoreAnnotations.TextAnnotation.class));
}
removed = os.toString();
}
if(posComma==-1 && posWH != -1 && this.headIndex < posWH){
StringBuilder os = new StringBuilder();
for(int i = 0; i < posWH-this.startIndex; i++){
if(i > 0) os.append(" ");
os.append(this.originalSpan.get(i).get(CoreAnnotations.TextAnnotation.class));
}
removed = os.toString();
}
if(posComma==-1 && posWH == -1){
removed = this.spanToString();
}
return removed;
}
public static String removeParenthesis(String text) {
if (text.split("\\(").length > 0) {
return text.split("\\(")[0].trim();
} else {
return "";
}
}
// the mention is 'the + commonNoun' form
protected boolean isTheCommonNoun() {
if (this.mentionType == MentionType.NOMINAL
&& this.spanToString().toLowerCase().startsWith("the ")
&& this.spanToString().split(" ").length == 2) {
return true;
} else {
return false;
}
}
private static Pair<IndexedWord, String> findDependentVerb(Mention m) {
if (m.enhancedDependency.getRoots().size() == 0) {
return new Pair<>();
}
// would be nice to condense this pattern, but sadly =reln
// always uses the last relation in the sequence, not the first
SemgrexPattern pattern = SemgrexPattern.compile("{idx:" + (m.headIndex+1) + "} [ <=reln {tag:/^V.*/}=verb | <=reln ({} << {tag:/^V.*/}=verb) ]");
SemgrexMatcher matcher = pattern.matcher(m.enhancedDependency);
while (matcher.find()) {
return Pair.makePair(matcher.getNode("verb"), matcher.getRelnString("reln"));
}
return new Pair<>();
}
/** Returns true if this mention is contained inside m. That is, it is a subspan of the same sentence. */
public boolean insideIn(Mention m){
return this.sentNum == m.sentNum
&& m.startIndex <= this.startIndex
&& this.endIndex <= m.endIndex;
}
public boolean moreRepresentativeThan(Mention m){
if(m==null) return true;
if (mentionType.representativeness > m.mentionType.representativeness) { return true; }
else if (m.mentionType.representativeness > mentionType.representativeness) { return false; }
else {
// pick mention with better NER
if (nerString != null && m.nerString == null) return true;
if (nerString == null && m.nerString != null) return false;
if (nerString != null && !nerString.equals(m.nerString)) {
if ("O".equals(m.nerString)) return true;
if ("O".equals(nerString)) return false;
if ("MISC".equals(m.nerString)) return true;
if ("MISC".equals(nerString)) return false;
}
// Ensure that both NER tags are neither MISC nor O, or are both not existent
assert nerString == null || nerString.equals(m.nerString) || (!nerString.equals("O") && !nerString.equals("MISC") && !m.nerString.equals("O") && !m.nerString.equals("MISC"));
// Return larger headIndex - startIndex
if (headIndex - startIndex > m.headIndex - m.startIndex) { return true; }
else if (headIndex - startIndex < m.headIndex - m.startIndex) { return false; }
// Return earlier sentence number
else if (sentNum < m.sentNum) { return true; }
else if (sentNum > m.sentNum) { return false; }
// Return earlier head index
else if (headIndex < m.headIndex) { return true; }
else if (headIndex > m.headIndex) { return false; }
// If the mentions are short, take the longer one
else if (originalSpan.size() <= 5 && originalSpan.size() > m.originalSpan.size()) { return true; }
else if (originalSpan.size() <= 5 && originalSpan.size() < m.originalSpan.size()) { return false; }
// If the mentions are long, take the shorter one (we're getting into the realm of nonsense by here)
else if (originalSpan.size() < m.originalSpan.size()) { return true; }
else if (originalSpan.size() > m.originalSpan.size()) { return false; }
else {
throw new IllegalStateException("Comparing a mention with itself for representativeness");
}
}
}
// Returns filtered premodifiers (no determiners or numerals)
public ArrayList<ArrayList<IndexedWord>> getPremodifiers(){
ArrayList<ArrayList<IndexedWord>> premod = new ArrayList<>();
if(headIndexedWord == null) return premod;
for(Pair<GrammaticalRelation,IndexedWord> child : enhancedDependency.childPairs(headIndexedWord)){
String function = child.first().getShortName();
if(child.second().index() < headWord.index()
&& !child.second.tag().equals("DT") && !child.second.tag().equals("WRB")
&& !function.endsWith("det") && !function.equals("nummod")
&& !function.startsWith("acl") && !function.startsWith("advcl")
&& !function.equals("punct")){
ArrayList<IndexedWord> phrase = new ArrayList<>(enhancedDependency.descendants(child.second()));
Collections.sort(phrase);
premod.add(phrase);
}
}
return premod;
}
// Returns filtered postmodifiers (no relative, -ed or -ing clauses)
public ArrayList<ArrayList<IndexedWord>> getPostmodifiers(){
ArrayList<ArrayList<IndexedWord>> postmod = new ArrayList<>();
if(headIndexedWord == null) return postmod;
for(Pair<GrammaticalRelation,IndexedWord> child : enhancedDependency.childPairs(headIndexedWord)){
String function = child.first().getShortName();
if(child.second().index() > headWord.index() &&
! function.endsWith("det") && ! function.equals("nummod")
&& ! function.startsWith("acl") && ! function.startsWith("advcl")
&& ! function.equals("punct") &&
//possessive clitic
! (function.equals("case") && enhancedDependency.descendants(child.second()).size() == 1
&& child.second.tag().equals("POS"))){
ArrayList<IndexedWord> phrase = new ArrayList<>(enhancedDependency.descendants(child.second()));
Collections.sort(phrase);
postmod.add(phrase);
}
}
return postmod;
}
public String[] getSplitPattern(){
ArrayList<ArrayList<IndexedWord>> premodifiers = getPremodifiers();
String[] components = new String[4];
components[0] = headWord.lemma();
if(premodifiers.size() == 0){
components[1] = headWord.lemma();
components[2] = headWord.lemma();
} else if(premodifiers.size() == 1){
ArrayList<AbstractCoreLabel> premod = Generics.newArrayList();
premod.addAll(premodifiers.get(premodifiers.size()-1));
premod.add(headWord);
components[1] = getPattern(premod);
components[2] = getPattern(premod);
} else {
ArrayList<AbstractCoreLabel> premod1 = Generics.newArrayList();
premod1.addAll(premodifiers.get(premodifiers.size()-1));
premod1.add(headWord);
components[1] = getPattern(premod1);
ArrayList<AbstractCoreLabel> premod2 = Generics.newArrayList();
for(ArrayList<IndexedWord> premodifier : premodifiers){
premod2.addAll(premodifier);
}
premod2.add(headWord);
components[2] = getPattern(premod2);
}
components[3] = getPattern();
return components;
}
public String getPattern(){
ArrayList<AbstractCoreLabel> pattern = Generics.newArrayList();
for(ArrayList<IndexedWord> premodifier : getPremodifiers()){
pattern.addAll(premodifier);
}
pattern.add(headWord);
for(ArrayList<IndexedWord> postmodifier : getPostmodifiers()){
pattern.addAll(postmodifier);
}
return getPattern(pattern);
}
public String getPattern(List<AbstractCoreLabel> pTokens){
ArrayList<String> phrase_string = new ArrayList<>();
String ne = "";
for(AbstractCoreLabel token : pTokens){
if(token.index() == headWord.index()){
phrase_string.add(token.lemma());
ne = "";
} else if( (token.lemma().equals("and") || StringUtils.isPunct(token.lemma()))
&& pTokens.size() > pTokens.indexOf(token)+1
&& pTokens.indexOf(token) > 0
&& pTokens.get(pTokens.indexOf(token)+1).ner().equals(pTokens.get(pTokens.indexOf(token)-1).ner())){
} else if(token.index() == headWord.index()-1
&& token.ner().equals(nerString)){
phrase_string.add(token.lemma());
ne = "";
} else if(!token.ner().equals("O")){
if(!token.ner().equals(ne)){
ne = token.ner();
phrase_string.add("<"+ne+">");
}
} else {
phrase_string.add(token.lemma());
ne = "";
}
}
return StringUtils.join(phrase_string);
}
public boolean isCoordinated(){
if(headIndexedWord == null) return false;
for(Pair<GrammaticalRelation,IndexedWord> child : enhancedDependency.childPairs(headIndexedWord)){
if(child.first().getShortName().equals("cc")) return true;
}
return false;
}
private static List<String> getContextHelper(List<? extends AbstractCoreLabel> words) {
List<List<AbstractCoreLabel>> namedEntities = Generics.newArrayList();
List<AbstractCoreLabel> ne = Generics.newArrayList();
String previousNEType = "";
int previousNEIndex = -1;
for (int i = 0; i < words.size(); i++) {
AbstractCoreLabel word = words.get(i);
if(!word.ner().equals("O")) {
if (!word.ner().equals(previousNEType) || previousNEIndex != i-1) {
ne = Generics.newArrayList();
namedEntities.add(ne);
}
ne.add(word);
previousNEType = word.ner();
previousNEIndex = i;
}
}
List<String> neStrings = new ArrayList<>();
Set<String> hs = Generics.newHashSet();
for (List<AbstractCoreLabel> namedEntity : namedEntities) {
String ne_str = StringUtils.joinWords(namedEntity, " ");
hs.add(ne_str);
}
neStrings.addAll(hs);
return neStrings;
}
public List<String> getContext() {
return getContextHelper(sentenceWords);
}
public List<String> getPremodifierContext() {
List<String> neStrings = new ArrayList<>();
for (List<IndexedWord> words : getPremodifiers()) {
neStrings.addAll(getContextHelper(words));
}
return neStrings;
}
/** Check relative pronouns */
public boolean isRelativePronoun(Mention m) {
return relativePronouns != null && relativePronouns.contains(m);
}
public boolean isRoleAppositive(Mention m, Dictionaries dict) {
String thisString = this.spanToString();
String thisStringLower = this.lowercaseNormalizedSpanString();
if(this.isPronominal() || dict.allPronouns.contains(thisStringLower)) return false;
if(!m.nerString.startsWith("PER") && !m.nerString.equals("O")) return false;
if(!this.nerString.startsWith("PER") && !this.nerString.equals("O")) return false;
if(!sameSentence(m) || !m.spanToString().startsWith(thisString)) return false;
if(m.spanToString().contains("'") || m.spanToString().contains(" and ")) return false;
if (!animaciesAgree(m) || this.animacy == Animacy.INANIMATE
|| this.gender == Gender.NEUTRAL || m.gender == Gender.NEUTRAL
|| !this.numbersAgree(m)) {
return false;
}
if (dict.demonymSet.contains(thisStringLower)
|| dict.demonymSet.contains(m.lowercaseNormalizedSpanString())) {
return false;
}
return true;
}
public boolean isDemonym(Mention m, Dictionaries dict) {
String thisCasedString = this.spanToString();
String antCasedString = m.spanToString();
// The US state matching part (only) is done cased
String thisNormed = dict.lookupCanonicalAmericanStateName(thisCasedString);
String antNormed = dict.lookupCanonicalAmericanStateName(antCasedString);
if (thisNormed != null && thisNormed.equals(antNormed)) {
return true;
}
// The rest is done uncased
String thisString = thisCasedString.toLowerCase(Locale.ENGLISH);
String antString = antCasedString.toLowerCase(Locale.ENGLISH);
if (thisString.startsWith("the ")) {
thisString = thisString.substring(4);
}
if (antString.startsWith("the ")) {
antString = antString.substring(4);
}
Set<String> thisDemonyms = dict.getDemonyms(thisString);
Set<String> antDemonyms = dict.getDemonyms(antString);
if (thisDemonyms.contains(antString) || antDemonyms.contains(thisString)) {
return true;
}
return false;
}
public String getPosition() {
int size = sentenceWords.size();
if(headIndex == 0) {
return "first";
} else if (headIndex == size -1) {
return "last";
} else {
if(headIndex > 0 && headIndex < size/3) {
return "begin";
} else if (headIndex >= size/3 && headIndex < 2 * size/3) {
return "middle";
} else if (headIndex >= 2 * size/3 && headIndex < size -1) {
return "end";
}
}
return null;
}
private IndexedWord headParent;
private IndexedWord getHeadParent() {
return headParent == null ?
(headParent = enhancedDependency.getParent(headIndexedWord)) : headParent;
}
private Collection<IndexedWord> headChildren;
private Collection<IndexedWord> getHeadChildren() {
return headChildren == null ?
(headChildren = enhancedDependency.getChildList(headIndexedWord)) : headChildren;
}
private Collection<IndexedWord> headSiblings;
private Collection<IndexedWord> getHeadSiblings() {
return headSiblings == null ?
(headSiblings = enhancedDependency.getSiblings(headIndexedWord)): headSiblings;
}
private List<IndexedWord> headPathToRoot;
private List<IndexedWord> getHeadPathToRoot() {
return headPathToRoot == null ?
(headPathToRoot = enhancedDependency.getPathToRoot(headIndexedWord)) : headPathToRoot;
}
public String getRelation() {
if(headIndexedWord == null) return null;
if(enhancedDependency.getRoots().isEmpty()) return null;
// root relation
if(enhancedDependency.getFirstRoot().equals(headIndexedWord)) return "root";
if(!enhancedDependency.containsVertex(getHeadParent())) return null;
GrammaticalRelation relation = enhancedDependency.reln(getHeadParent(), headIndexedWord);
// adjunct relations
if ((relation.toString().startsWith("nmod")
&& getHeadChildren().stream().anyMatch(c -> c.tag().equals("IN")))
|| relation == UniversalEnglishGrammaticalRelations.TEMPORAL_MODIFIER
|| relation == UniversalEnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER
|| relation == UniversalEnglishGrammaticalRelations.ADVERBIAL_MODIFIER
|| relation == UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER)
return "adjunct";
// subject relations
if(relation == UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT
|| relation == UniversalEnglishGrammaticalRelations.CLAUSAL_SUBJECT) return "subject";
if(relation == UniversalEnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT
|| relation == UniversalEnglishGrammaticalRelations.CLAUSAL_PASSIVE_SUBJECT) return "subject";
// verbal argument relations
if(relation == UniversalEnglishGrammaticalRelations.CLAUSAL_COMPLEMENT
|| relation == UniversalEnglishGrammaticalRelations.XCLAUSAL_COMPLEMENT
|| relation == UniversalEnglishGrammaticalRelations.AGENT
|| relation == UniversalEnglishGrammaticalRelations.DIRECT_OBJECT
|| relation == UniversalEnglishGrammaticalRelations.INDIRECT_OBJECT) return "verbArg";
// noun argument relations
if(relation == UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER
|| relation == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER //
|| relation == UniversalEnglishGrammaticalRelations.ADJECTIVAL_MODIFIER
|| relation == UniversalEnglishGrammaticalRelations.APPOSITIONAL_MODIFIER
|| relation == UniversalEnglishGrammaticalRelations.POSSESSION_MODIFIER) return "nounArg";
return null;
}
public int getModifiers(Dictionaries dict){
if(headIndexedWord == null) return 0;
int count = 0;
List<Pair<GrammaticalRelation, IndexedWord>> childPairs = enhancedDependency.childPairs(headIndexedWord);
for(Pair<GrammaticalRelation, IndexedWord> childPair : childPairs) {
GrammaticalRelation gr = childPair.first;
IndexedWord word = childPair.second;
if(gr == UniversalEnglishGrammaticalRelations.ADJECTIVAL_MODIFIER
|| gr == UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER
|| gr.toString().startsWith("prep_")) {
count++;
}
// add possessive if not a personal determiner
if(gr == UniversalEnglishGrammaticalRelations.POSSESSION_MODIFIER && !dict.determiners.contains(word.lemma())) {
count++;
}
}
return count;
}
public String getQuantification(Dictionaries dict){
if(headIndexedWord == null) return null;
if(!nerString.equals("O")) return "definite";
Set<IndexedWord> quant = enhancedDependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.DETERMINER);
Set<IndexedWord> poss = enhancedDependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.POSSESSION_MODIFIER);
if (!quant.isEmpty()) {
for (IndexedWord word : quant) {
String det = word.lemma();
if (dict.determiners.contains(det)) {
return "definite";
} else if (dict.quantifiers2.contains(det)) {
return "quantified";
}
}
} else if (!poss.isEmpty()) {
return "definite";
} else {
quant = enhancedDependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NUMERIC_MODIFIER);
if (!quant.isEmpty()) {
return "quantified";
}
}
return "indefinite";
}
public int getNegation(Dictionaries dict) {
if(headIndexedWord == null) return 0;
// direct negation in a child
Collection<IndexedWord> children = enhancedDependency.getChildren(headIndexedWord);
for(IndexedWord child : children) {
if(dict.negations.contains(child.lemma())) return 1;
}
// or has a sibling
for(IndexedWord sibling : getHeadSiblings()) {
if(dict.negations.contains(sibling.lemma()) && !enhancedDependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT)) return 1;
}
// check the parent
List<Pair<GrammaticalRelation,IndexedWord>> parentPairs = enhancedDependency.parentPairs(headIndexedWord);
if (!parentPairs.isEmpty()) {
Pair<GrammaticalRelation,IndexedWord> parentPair = parentPairs.get(0);
GrammaticalRelation gr = parentPair.first;
// check negative prepositions
if(dict.neg_relations.contains(gr.toString())) return 1;
}
return 0;
}
public int getModal(Dictionaries dict) {
if(headIndexedWord == null) return 0;
// direct modal in a child
Collection<IndexedWord> children = enhancedDependency.getChildren(headIndexedWord);
for(IndexedWord child : children) {
if(dict.modals.contains(child.lemma())) return 1;
}
// check the parent
IndexedWord parent = getHeadParent();
if (parent != null) {
if(dict.modals.contains(parent.lemma())) return 1;
// check the children of the parent (that is needed for modal auxiliaries)
IndexedWord child = enhancedDependency.getChildWithReln(parent,UniversalEnglishGrammaticalRelations.AUX_MODIFIER);
if(!enhancedDependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT) && child != null && dict.modals.contains(child.lemma())) return 1;
}
// look at the path to root
List<IndexedWord> path = getHeadPathToRoot();
if(path == null) return 0;
for(IndexedWord word : path) {
if(dict.modals.contains(word.lemma())) return 1;
}
return 0;
}
public int getReportEmbedding(Dictionaries dict) {
if(headIndexedWord == null) return 0;
// check adverbial clause with marker "as"
for(IndexedWord sibling : getHeadSiblings()) {
if(dict.reportVerb.contains(sibling.lemma()) && enhancedDependency.hasParentWithReln(sibling,UniversalEnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER)) {
IndexedWord marker = enhancedDependency.getChildWithReln(sibling,UniversalEnglishGrammaticalRelations.MARKER);
if (marker != null && marker.lemma().equals("as")) {
return 1;
}
}
}
// look at the path to root
List<IndexedWord> path = getHeadPathToRoot();
if(path == null) return 0;
boolean isSubject = false;
// if the node itself is a subject, we will not take into account its parent in the path
if(enhancedDependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT)) isSubject = true;
for (IndexedWord word : path) {
if(!isSubject && (dict.reportVerb.contains(word.lemma()) || dict.reportNoun.contains(word.lemma()))) {
return 1;
}
// check how to put isSubject
isSubject = enhancedDependency.hasParentWithReln(word, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT);
}
return 0;
}
public int getCoordination() {
if(headIndexedWord == null) return 0;
Set<GrammaticalRelation> relations = enhancedDependency.childRelns(headIndexedWord);
for (GrammaticalRelation rel : relations) {
if(rel.toString().startsWith("conj:")) {
return 1;
}
}
Set<GrammaticalRelation> parent_relations = enhancedDependency.relns(headIndexedWord);
for (GrammaticalRelation rel : parent_relations) {
if(rel.toString().startsWith("conj:")) {
return 1;
}
}
return 0;
}
@Override
public boolean equals(Object obj) {
if (obj == this) { return true; }
if (obj == null) { return false; }
if (obj.getClass() != getClass()) { return false; }
Mention rhs = (Mention) obj;
if (!Objects.equals(mentionType, rhs.mentionType)) { return false; }
if (!Objects.equals(number, rhs.number)) { return false; }
if (!Objects.equals(gender, rhs.gender)) { return false; }
if (!Objects.equals(animacy, rhs.animacy)) { return false; }
if (!Objects.equals(person, rhs.person)) { return false; }
if (!Objects.equals(headString, rhs.headString)) { return false; }
if (!Objects.equals(nerString, rhs.nerString)) { return false; }
if (startIndex != rhs.startIndex) { return false; }
if (endIndex != rhs.endIndex) { return false; }
if (headIndex != rhs.headIndex) { return false; }
if (mentionID != rhs.mentionID) { return false; }
if (originalRef != rhs.originalRef) { return false; }
if (!Objects.equals(headIndexedWord, rhs.headIndexedWord)) { return false; }
if (!Objects.equals(dependingVerb, rhs.dependingVerb)) { return false; }
if (!Objects.equals(headWord, rhs.headWord)) { return false; }
if (goldCorefClusterID != rhs.goldCorefClusterID) { return false; }
if (corefClusterID != rhs.corefClusterID) { return false; }
if (mentionNum != rhs.mentionNum) { return false; }
if (sentNum != rhs.sentNum) { return false; }
if (utter != rhs.utter) { return false; }
if (paragraph != rhs.paragraph) { return false; }
if (isSubject != rhs.isSubject) { return false; }
if (isDirectObject != rhs.isDirectObject) { return false; }
if (isIndirectObject != rhs.isIndirectObject) { return false; }
if (isPrepositionObject != rhs.isPrepositionObject) { return false; }
if (hasTwin != rhs.hasTwin) { return false; }
if (generic != rhs.generic) { return false; }
if (isSingleton != rhs.isSingleton) { return false; }
if (!Objects.equals(originalSpan, rhs.originalSpan)) { return false; }
if (!Objects.equals(sentenceWords, rhs.sentenceWords)) { return false; }
if (!Objects.equals(basicDependency, rhs.basicDependency)) { return false; }
if (!Objects.equals(enhancedDependency, rhs.enhancedDependency)) { return false; }
if (!Objects.equals(contextParseTree, rhs.contextParseTree)) { return false; }
if (!Objects.equals(dependents, rhs.dependents)) { return false; }
if (!Objects.equals(preprocessedTerms, rhs.preprocessedTerms)) { return false; }
return true;
}
@Override
public int hashCode() {
int result = 49;
int c = 0;
c += startIndex;
c += endIndex;
result = (37 * result) + c;
return result;
}
}