package edu.stanford.nlp.quoteattribution.Sieves.MSSieves; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator; import edu.stanford.nlp.quoteattribution.Person; import edu.stanford.nlp.stats.ClassicCounter; import edu.stanford.nlp.stats.Counter; import edu.stanford.nlp.stats.Counters; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Pair; import java.util.*; /** * @author Michael Fang * @author Grace Muzny */ public class BaselineTopSpeakerSieve extends MSSieve { private Map<String, Person.Gender> genderList; private Set<String> familyRelations; public static final int BACKWARD_WINDOW = 2000; public static final int BACKWARD_WINDOW_BIG = 4000; public static final int FORWARD_WINDOW = 500; public static final int FORWARD_WINDOW_BIG = 2500; public static final double FORWARD_WEIGHT = 0.34; public static final double BACKWARD_WEIGHT = 1.0; public BaselineTopSpeakerSieve(Annotation doc, Map<String, List<Person>> characterMap, Map<Integer, String> pronounCorefMap, Set<String> animacySet, Map<String, Person.Gender> genderList, Set<String> familyRelations) { super(doc, characterMap, pronounCorefMap, animacySet); this.genderList = genderList; this.familyRelations = familyRelations; } public void doMentionToSpeaker(Annotation doc) { topSpeakerInRange(doc); } public MentionData makeMentionData(CoreMap q) { if(q.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) { return new MentionData(q.get(QuoteAttributionAnnotator.MentionBeginAnnotation.class), q.get(QuoteAttributionAnnotator.MentionEndAnnotation.class), q.get(QuoteAttributionAnnotator.MentionAnnotation.class), q.get(QuoteAttributionAnnotator.MentionTypeAnnotation.class)); } return new MentionData(-1, -1, null, null); } public void topSpeakerInRange(Annotation doc) { List<CoreLabel> toks = doc.get(CoreAnnotations.TokensAnnotation.class); List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class); for (int quote_idx = 0; quote_idx < quotes.size(); quote_idx++) { CoreMap quote = quotes.get(quote_idx); if(quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) == null) { Pair<Integer, Integer> quoteRun = new Pair<>(quote.get(CoreAnnotations.TokenBeginAnnotation.class), quote.get(CoreAnnotations.TokenEndAnnotation.class)); List<MentionData> closestMentionsBackward = findClosestMentionsInSpanBackward( new Pair<>(Math.max(0, quoteRun.first - BACKWARD_WINDOW), quoteRun.first - 1)); List<MentionData> closestMentions = findClosestMentionsInSpanForward(new Pair<>(quoteRun.second + 1, Math.min(quoteRun.second + FORWARD_WINDOW, toks.size() - 1))); closestMentions.addAll(closestMentionsBackward); Person.Gender gender = getGender(makeMentionData(quote)); List<String> topSpeakers = Counters.toSortedList(getTopSpeakers(closestMentions, closestMentionsBackward, gender, quote, false)); //if none found, try again with bigger window if(topSpeakers.size() == 0) { closestMentionsBackward = findClosestMentionsInSpanBackward(new Pair<>(Math.max(0, quoteRun.first - BACKWARD_WINDOW_BIG), quoteRun.first - 1)); closestMentions = findClosestMentionsInSpanForward(new Pair<>(quoteRun.second + 1, Math.min(quoteRun.second + FORWARD_WINDOW_BIG, toks.size() - 1))); topSpeakers = Counters.toSortedList(getTopSpeakers(closestMentions, closestMentionsBackward, gender, quote, true)); } if(topSpeakers.size() == 0) { System.err.println("Watch out, that's an empty top speakers list!"); continue; } topSpeakers = removeQuoteNames(topSpeakers, quote); String topSpeaker = topSpeakers.get(0); Pair<String, String> nextPrediction = getConversationalNextPrediction(quotes, quote_idx, gender); boolean set = updatePredictions(quote, nextPrediction); if(set) { continue; } Pair<String, String> prevPrediction = getConversationalPreviousPrediction(quotes,quote_idx, gender); set = updatePredictions(quote, prevPrediction); if(set) { continue; } Pair<String, String> famPrediction = getFamilyAnimateVocative(quotes, quote_idx, gender, topSpeakers); set = updatePredictions(quote, famPrediction); if(set) { continue; } updatePredictions(quote, new Pair<>(topSpeaker, "")); } } } public List<String> removeQuoteNames(List<String> topSpeakers, CoreMap quote) { // if the top speakers name is in the quote, // move to the next option and remove it String topSpeaker = topSpeakers.get(0); Set<Person> namesInParagraphQuotes = getNamesInParagraph(quote); if(namesInParagraphQuotes.contains(characterMap.get(topSpeaker).get(0)) && topSpeakers.size() > 1) { topSpeakers.remove(0); } return topSpeakers; } public Person.Gender getGender(MentionData mention) { Person.Gender gender = Person.Gender.UNK; if (mention.type != null && mention.type.equals("pronoun")) { if (mention.text.equalsIgnoreCase("he")) { gender = Person.Gender.MALE; } else if (mention.text.equalsIgnoreCase("she")){ gender = Person.Gender.FEMALE; } } else if (mention.type != null && mention.type.equals("animate noun")) { String mentionText = mention.text.toLowerCase(); if (genderList.get(mentionText) != null) { gender = genderList.get(mentionText); } } else if(mention.type != null && mention.type.equals("name")) { gender = characterMap.get(mention.text).get(0).gender; } return gender; } public Counter<String> getTopSpeakers(List<MentionData> closestMentions, List<MentionData> closestMentionsBackward, Person.Gender gender, CoreMap quote, boolean overrideGender) { Counter<String> topSpeakerInRange = new ClassicCounter<>(); Counter<String> topSpeakerInRangeIgnoreGender = new ClassicCounter<>(); Set<MentionData> backwardsMentions = new HashSet<>(); backwardsMentions.addAll(closestMentionsBackward); for(MentionData mention : closestMentions) { double weight = backwardsMentions.contains(mention) ? BACKWARD_WEIGHT : FORWARD_WEIGHT; if(mention.type.equals(NAME)) { Person p = characterMap.get(mention.text).get(0); if ((gender == Person.Gender.MALE && p.gender == Person.Gender.MALE) || (gender == Person.Gender.FEMALE && p.gender == Person.Gender.FEMALE) || (gender == Person.Gender.UNK)) { topSpeakerInRange.incrementCount(p.name, weight); } topSpeakerInRangeIgnoreGender.incrementCount(p.name, weight); if(closestMentions.size() == 128 && closestMentionsBackward.size() == 94) System.out.println(p.name + " " + weight + " name"); } else if (mention.type.equals(PRONOUN)) { int charBeginKey = doc.get(CoreAnnotations.TokensAnnotation.class).get(mention.begin).beginPosition(); Person p = doCoreference(charBeginKey, quote); if (p != null) { if ((gender == Person.Gender.MALE && p.gender == Person.Gender.MALE) || (gender == Person.Gender.FEMALE && p.gender == Person.Gender.FEMALE) || (gender == Person.Gender.UNK)) { topSpeakerInRange.incrementCount(p.name, weight); } topSpeakerInRangeIgnoreGender.incrementCount(p.name, weight); if(closestMentions.size() == 128 && closestMentionsBackward.size() == 94) System.out.println(p.name + " " + weight + " pronoun"); } } } if (topSpeakerInRange.size() > 0) { return topSpeakerInRange; } else if (gender != Person.Gender.UNK && !overrideGender) { return topSpeakerInRange; } return topSpeakerInRangeIgnoreGender; } public boolean updatePredictions(CoreMap quote, Pair<String, String> speakerAndMethod) { if(speakerAndMethod.first != null && speakerAndMethod.second != null) { quote.set(QuoteAttributionAnnotator.SpeakerAnnotation.class, characterMap.get(speakerAndMethod.first).get(0).name); quote.set(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class, "Baseline Top" + speakerAndMethod.second); return true; } return false; } public Pair<String, String> getFamilyAnimateVocative(List<CoreMap> quotes, int quote_index, Person.Gender gender, List<String> topSpeakers) { MentionData mention = makeMentionData(quotes.get(quote_index)); if(mention.text != null) { if(mention.type.equals("animate noun") && familyRelations.contains(mention.text.toLowerCase()) && gender != Person.Gender.UNK) { int quoteContainingMention = getQuoteContainingRange(quotes, new Pair<>(mention.begin, mention.end)); if(quoteContainingMention >= 0) { String relatedName = quotes.get(quoteContainingMention).get(QuoteAttributionAnnotator.SpeakerAnnotation.class); if(relatedName != null) { for (String speaker : topSpeakers) { String[] speakerNames = speaker.split("_"); if (relatedName.endsWith(speakerNames[speakerNames.length - 1])) { return new Pair<>(speaker, "family animate"); } } } } } } return new Pair<>(null, null); } public Pair<String, String> getConversationalPreviousPrediction(List<CoreMap> quotes, int quoteIndex, Person.Gender gender) { String topSpeaker = null; String modifier = null; // if the n - 2 paragraph quotes are labelled with a speaker and // that speakers gender does not disagree, label with that speaker List<Integer> quotesInPrevPrev = new ArrayList<>(); CoreMap quote = quotes.get(quoteIndex); int quoteParagraph = getQuoteParagraph(quote); for(int j = quoteIndex - 1; j >= 0; j--) { if(getQuoteParagraph(quotes.get(j)) == quoteParagraph - 2) { quotesInPrevPrev.add(j); } } for (int prevPrev : quotesInPrevPrev) { CoreMap prevprevQuote = quotes.get(prevPrev); String speakerName = prevprevQuote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class); if(speakerName != null && (gender == Person.Gender.UNK) || getGender(makeMentionData(prevprevQuote)) == gender) { topSpeaker = speakerName; modifier = " conversation - prev"; } } return new Pair(topSpeaker, modifier); } public Pair<String, String> getConversationalNextPrediction(List<CoreMap> quotes, int quoteIndex, Person.Gender gender) { String topSpeaker = null; String modifier = null; // if the n - 2 paragraph quotes are labelled with a speaker and // that speakers gender does not disagree, label with that speaker List<Integer> quotesInNextNext = new ArrayList<>(); CoreMap quote = quotes.get(quoteIndex); int quoteParagraph = getQuoteParagraph(quote); for(int j = quoteIndex + 1; j < quotes.size(); j++) { if(getQuoteParagraph(quotes.get(j)) == quoteParagraph + 2) { quotesInNextNext.add(j); } } for (int nextNext : quotesInNextNext) { CoreMap nextNextQuote = quotes.get(nextNext); String speakerName = nextNextQuote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class); MentionData md = makeMentionData(quotes.get(nextNext)); if(speakerName != null && (gender == Person.Gender.UNK) || getGender(md) == gender) { topSpeaker = speakerName; modifier = " conversation - next"; } } return new Pair<>(topSpeaker, modifier); } public int getQuoteContainingRange(List<CoreMap> quotes, Pair<Integer, Integer> range) { for (int i = 0; i < quotes.size(); i++) { if(quotes.get(i).get(CoreAnnotations.TokenBeginAnnotation.class) <= range.first && quotes.get(i).get(CoreAnnotations.TokenEndAnnotation.class) >= range.second) { return i; } } return -1; } }