package arkref.data;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import arkref.analysis.ARKref;
import arkref.parsestuff.U;
/**
* For identifying the genders of given names.
* Uses US Census data from
*
* http://www.census.gov/genealogy/names/dist.female.first
* http://www.census.gov/genealogy/names/dist.male.first
*
* @author Michael Heilman
*
*/
public class FirstNames {
private Map<String, NameGender> genderMap;
private static FirstNames instance;
//I did not use the same gender enumeration as in
//the types class to avoid having that extra
//dependency. I want to be able to take this
//class and use it for other stuff if I want. --MJH
private static enum NameGender {
Male, Female, Unknown;
public String toString() {
switch(this) {
case Male: return "Mal";
case Female: return "Fem";
case Unknown: return "Unk";
}
return "Unk";
}
}
private FirstNames(){
genderMap = new HashMap<String, NameGender>();
String maleNamesPath = ARKref.getProperties().getProperty("maleFirstNamesFile", "config/dist.male.first");
String femaleNamesPath = ARKref.getProperties().getProperty("femaleFirstNamesFile", "config/dist.female.first");
//load U.S. census data
//Temporarily keep frequencies of male names to
//make decisions about ambiguous names.
Map<String, Double> maleFrequencies = loadNameFrequencies(maleNamesPath);
Map<String, Double> femaleFrequencies = loadNameFrequencies(femaleNamesPath);
//add male names
for(Map.Entry<String, Double> entry: maleFrequencies.entrySet()){
genderMap.put(entry.getKey(), NameGender.Male);
}
//add female names, check frequencies for ambiguous names
String name;
Double freq;
for(Map.Entry<String, Double> entry: femaleFrequencies.entrySet()){
name = entry.getKey();
freq = entry.getValue();
if(maleFrequencies.get(name) == null || maleFrequencies.get(name) < freq){
genderMap.put(name, NameGender.Female);
}
}
}
private Map<String, Double> loadNameFrequencies(String path){
Map<String, Double> res = new HashMap<String, Double>();
String buf;
String [] parts;
String name;
Double freq;
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
while((buf=br.readLine()) != null){
parts = buf.split("\\s+");
name = parts[0].toLowerCase();
freq = new Double(parts[1]);
res.put(name, freq);
}
} catch (IOException e) {
e.printStackTrace();
}
return res;
}
public static FirstNames getInstance() {
if(instance == null){
instance = new FirstNames();
}
return instance;
}
public Set<String> getMaleNames(){
Set<String> res = new HashSet<String>();
for(Map.Entry<String, NameGender> entry: genderMap.entrySet()){
if(entry.getValue() == NameGender.Male){
res.add(entry.getKey());
}
}
return res;
}
public Set<String> getFemaleNames(){
Set<String> res = new HashSet<String>();
for(Map.Entry<String, NameGender> entry: genderMap.entrySet()){
if(entry.getValue() == NameGender.Female){
res.add(entry.getKey());
}
}
return res;
}
public Set<String> getAllFirstNames(){
Set<String> res = new HashSet<String>();
for(Map.Entry<String, NameGender> entry: genderMap.entrySet()){
res.add(entry.getKey());
}
return res;
}
public NameGender getGender(String name) {
NameGender res;
NameGender gender = genderMap.get(name.toLowerCase());
if(gender == null){
res = NameGender.Unknown;
}else{
res = gender;
}
return res;
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
String buf;
String genderS;
//pre-load
FirstNames.getInstance();
System.err.println("Type names on standard input...");
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
while((buf = br.readLine()) != null){
buf = buf.trim();
genderS = FirstNames.getInstance().getGenderString(buf);
U.pl(genderS);
}
}
public String getGenderString(String name) {
String genderS;
NameGender gender = FirstNames.getInstance().getGender(name);
if(gender == NameGender.Male){
genderS = "Mal";
}else if (gender == NameGender.Female){
genderS = "Fem";
}else{
genderS = "";
}
return genderS;
}
}