//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.coreference.impl.sieves;
import java.util.List;
import org.apache.uima.jcas.JCas;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.Cluster;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.Mention;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.MentionType;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.Person;
/**
* Joins pronouns which are in the same sentence.
* <p>
* This is not part of the original paper, and it might have been taken care of during their
* implementation. However it seems sensible.
* <p>
* There are probably areas of english where this does not work well. "Jim saw James and he was
* going to town" (James = he?) vs "He saw James and he was going to town" (he = ?).
*
*/
public class InSentencePronounSieve extends AbstractCoreferenceSieve {
// TODO: Not implemented, as these seem ambiguous:
// Ok if third singular in following sets
// - he, his; or him, himself .
// - I don't think we can do anything with she, her, hers,herself (since no equivalent of his so
// her = him/his) which could be different in the sentence
// Third plural: e.g. they, their, theirs; or them, themselves
// Third neuter: e.g. it,its,; or itself
/**
* Constructor for InSentencePronounSieve
*/
public InSentencePronounSieve(JCas jCas, List<Cluster> clusters, List<Mention> mentions) {
super(jCas, clusters, mentions);
}
@Override
public void sieve() {
List<Mention> mentions = getMentions(MentionType.PRONOUN);
for (int i = 0; i < mentions.size(); i++) {
final Mention a = mentions.get(i);
final String aText = a.getText();
for (int j = i + 1; j < mentions.size(); j++) {
final Mention b = mentions.get(j);
final String bText = b.getText();
if (a.getSentenceIndex() != b.getSentenceIndex()) {
continue;
}
if(!(firstPerson(a, b) || secondPerson(a, b) || thirdPerson(a, b)) && aText.equalsIgnoreCase(bText)){
// If the text is the same, then ok
addToCluster(a, b);
}
}
}
}
private boolean firstPerson(Mention a, Mention b){
// Ok if both from FIRST single, e.g. i, me, mine, my, myself
// Ok if both from FIRST plural, e.g. we, us, our, ours, ourselves
if (a.getPerson() == Person.FIRST && b.getPerson() == Person.FIRST) {
addToCluster(a, b);
return true;
}
return false;
}
private boolean secondPerson(Mention a, Mention b){
// Ok if from second {yourself, yourselves, you your yours} not mixing plural and
// singular here
if (a.getPerson() == Person.SECOND && b.getPerson() == Person.SECOND
&& a.getMultiplicity() == b.getMultiplicity()) {
addToCluster(a, b);
return true;
}
return false;
}
private boolean thirdPerson(Mention a, Mention b){
// Ok if from third, if you match on everything
if (a.getPerson() == Person.THIRD && b.getPerson() == Person.THIRD
&& a.getMultiplicity() == b.getMultiplicity()
&& a.getGender() == b.getGender()) {
addToCluster(a, b);
return true;
}
return false;
}
}