//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.coreference.impl.enhancers;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.Mention;
import uk.gov.dstl.baleen.annotators.coreference.impl.data.MentionType;
import uk.gov.dstl.baleen.resources.SharedGenderMultiplicityResource;
import uk.gov.dstl.baleen.resources.data.Gender;
import uk.gov.dstl.baleen.types.Base;
import uk.gov.dstl.baleen.types.common.Nationality;
import uk.gov.dstl.baleen.types.common.Person;
/**
* Adds gender information to a mention.
*/
public class GenderEnhancer implements MentionEnhancer {
private static final Map<String, Gender> PRONOUN_MAP = new HashMap<>();
private static final Map<String, Gender> TITLE_MAP = new HashMap<>();
private final SharedGenderMultiplicityResource genderResource;
static {
Arrays.asList("he", "him", "his", "himself")
.stream().forEach(s -> PRONOUN_MAP.put(s, Gender.M));
Arrays.asList("she", "her", "hers", "herself")
.stream().forEach(s -> PRONOUN_MAP.put(s, Gender.F));
Arrays.asList("it", "its", "itself", "when", "where", "there", "here")
.stream().forEach(s -> PRONOUN_MAP.put(s, Gender.N));
Arrays.asList("mr", "master", "sir", "lord", "baron", "count", "duke", "prince", "king", "father", "fr", "brother", "abbott", "his royal highness", "his majesty", "emperor", "tsar")
.stream().forEach(s -> TITLE_MAP.put(s, Gender.M));
Arrays.asList("mrs", "miss", "ms", "dame", "lady", "baroness", "countess", "duchess", "princess", "queen", "mother", "sister", "abbess", "her royal highness", "her majesty", "empress", "tsarista")
.stream().forEach(s -> TITLE_MAP.put(s, Gender.F));
}
/**
* Constructor for GenderEnhancer
*/
public GenderEnhancer(SharedGenderMultiplicityResource genderResource) {
this.genderResource = genderResource;
}
@Override
public void enhance(Mention mention) {
if (mention.getType() == MentionType.PRONOUN) {
mention.setGender(PRONOUN_MAP.getOrDefault(mention.getText().toLowerCase(), Gender.UNKNOWN));
} else if (mention.getType() == MentionType.ENTITY) {
final Base annotation = mention.getAnnotation();
if (annotation instanceof Person) {
final Person p = (Person) annotation;
Gender gender = getGenderFromTitle(p.getTitle());
if (gender == Gender.UNKNOWN) {
gender = genderResource.lookupGender(mention.getText());
}
mention.setGender(gender);
} else if (annotation instanceof Nationality) {
mention.setGender(Gender.UNKNOWN);
} else {
mention.setGender(Gender.N);
}
} else {
final Gender gender = genderResource.lookupGender(mention.getText());
mention.setGender(gender);
}
}
/**
* Determine the gender of a title (e.g. Mr would return Gender.M), or return Gender.UNKNOWN if not known
*/
public static Gender getGenderFromTitle(String title) {
if(title == null)
return Gender.UNKNOWN;
Gender gender = TITLE_MAP.get(title.trim().toLowerCase());
if(gender == null){
for(String titlePart : title.split("\\h+")){
gender = TITLE_MAP.get(titlePart.trim().toLowerCase());
if(gender != null)
return gender;
}
return Gender.UNKNOWN;
}else{
return gender;
}
}
}