//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.coreference.impl.enhancers;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.Mention;
import uk.gov.dstl.baleen.resources.SharedGenderMultiplicityResource;
import uk.gov.dstl.baleen.resources.data.Multiplicity;
import uk.gov.dstl.baleen.types.common.Organisation;
import uk.gov.dstl.baleen.types.language.WordToken;
/**
* Add multiplicity information to mentions.
*/
public class MultiplicityEnhancer implements MentionEnhancer {
private static final Map<String, Multiplicity> PRONOUN_MAP = new HashMap<>();
private final SharedGenderMultiplicityResource multiplicityResource;
static {
// See Person
Arrays.asList("i", "he", "her", "herself", "hers", "her", "him", "himself", "his", "it", "its", "itself", "me",
"myself", "mine", "my", "one", "oneself", "one's", "she", "yourself").stream()
.forEach(s -> PRONOUN_MAP.put(s, Multiplicity.SINGULAR));
Arrays.asList("ours", "our", "ourself", "ourselves", "their", "theirs", "them", "themself", "themselves",
"they", "us", "we", "yourself", "yourselves").stream()
.forEach(s -> PRONOUN_MAP.put(s, Multiplicity.PLURAL));
}
/**
* Constructor for MultiplicityEnhancer
*/
public MultiplicityEnhancer(SharedGenderMultiplicityResource multiplicityResource) {
this.multiplicityResource = multiplicityResource;
}
@Override
public void enhance(Mention mention) {
switch (mention.getType()) {
case PRONOUN:
mention.setMultiplicity(PRONOUN_MAP.getOrDefault(mention.getText().toLowerCase(), Multiplicity.UNKNOWN));
return;
case ENTITY:
mention.setMultiplicity(getEntityMultiplicity(mention));
break;
case NP:
mention.setMultiplicity(getNounPhraseMultiplicity(mention));
break;
default:
return;
}
// TODO: Should we always check our resource and then override the multiplicity?
if (mention.getMultiplicity() == Multiplicity.UNKNOWN) {
final Multiplicity assignedMultiplicity = multiplicityResource.lookupMultiplicity(mention.getText());
mention.setMultiplicity(assignedMultiplicity);
}
}
private Multiplicity getEntityMultiplicity(Mention mention){
// Assumed singular, unless organisation
if (mention.getAnnotation() instanceof Organisation) {
return Multiplicity.UNKNOWN;
} else {
return Multiplicity.SINGULAR;
}
}
private Multiplicity getNounPhraseMultiplicity(Mention mention){
Multiplicity m = Multiplicity.UNKNOWN;
final WordToken head = mention.getHeadWordToken();
if (head != null) {
if ("NNS".equalsIgnoreCase(head.getPartOfSpeech())
|| "NPS".equalsIgnoreCase(mention.getHeadWordToken().getPartOfSpeech())) {
m = Multiplicity.PLURAL;
} else {
m = Multiplicity.SINGULAR;
}
}
return m;
}
}