package edu.stanford.nlp.pipeline; import java.lang.reflect.Field; import java.util.*; /** * This contains mappings from strings to language properties files */ public class LanguageInfo { /** languages supported **/ public enum HumanLanguage {ARABIC, CHINESE, ENGLISH, FRENCH, GERMAN, SPANISH} /** list of properties files for each language **/ public static final String ARABIC_PROPERTIES = "StanfordCoreNLP-arabic.properties"; public static final String CHINESE_PROPERTIES = "StanfordCoreNLP-chinese.properties"; public static final String ENGLISH_PROPERTIES = "StanfordCoreNLP.properties"; public static final String FRENCH_PROPERTIES = "StanfordCoreNLP-french.properties"; public static final String GERMAN_PROPERTIES = "StanfordCoreNLP-german.properties"; public static final String SPANISH_PROPERTIES = "StanfordCoreNLP-spanish.properties"; /** map enum to properties file **/ public static final HashMap<HumanLanguage,String> languageToPropertiesFile; static { languageToPropertiesFile = new HashMap<HumanLanguage,String>(); languageToPropertiesFile.put(HumanLanguage.ARABIC, ARABIC_PROPERTIES); languageToPropertiesFile.put(HumanLanguage.CHINESE, CHINESE_PROPERTIES); languageToPropertiesFile.put(HumanLanguage.ENGLISH, ENGLISH_PROPERTIES); languageToPropertiesFile.put(HumanLanguage.FRENCH, FRENCH_PROPERTIES); languageToPropertiesFile.put(HumanLanguage.GERMAN, GERMAN_PROPERTIES); languageToPropertiesFile.put(HumanLanguage.SPANISH, SPANISH_PROPERTIES); } private LanguageInfo() { } /** * Go through all of the paths via reflection, and print them out in a TSV format. * This is useful for command line scripts. * * @param args Ignored. */ public static void main(String[] args) throws IllegalAccessException { for (Field field : LanguageInfo.class.getFields()) { System.out.println(field.getName() + "\t" + field.get(null)); } } /** return the properties file name for a specific language **/ public static String getLanguagePropertiesFile(String inputString) { return languageToPropertiesFile.get(getLanguageFromString(inputString)); } /** convert various input strings to language enum **/ public static HumanLanguage getLanguageFromString(String inputString) { if (inputString.toLowerCase().equals("arabic") || inputString.toLowerCase().equals("ar")) return HumanLanguage.ARABIC; if (inputString.toLowerCase().equals("english") || inputString.toLowerCase().equals("en")) return HumanLanguage.ENGLISH; if (inputString.toLowerCase().equals("chinese") || inputString.toLowerCase().equals("zh")) return HumanLanguage.CHINESE; if (inputString.toLowerCase().equals("french") || inputString.toLowerCase().equals("fr")) return HumanLanguage.FRENCH; if (inputString.toLowerCase().equals("german") || inputString.toLowerCase().equals("de")) return HumanLanguage.GERMAN; if (inputString.toLowerCase().equals("spanish") || inputString.toLowerCase().equals("es")) return HumanLanguage.SPANISH; else return null; } /** check if language is a segmenter language, return enum **/ public static boolean isSegmenterLanguage(HumanLanguage language) { if (language == HumanLanguage.ARABIC || language == HumanLanguage.CHINESE) return true; else return false; } public static boolean isSegmenterLanguage(String inputString) { return isSegmenterLanguage(getLanguageFromString(inputString)); } }