package de.unigoettingen.sub.commons.ocr.util.abbyy;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import de.uni_goettingen.sub.commons.ocr.api.OcrFormat;
import de.uni_goettingen.sub.commons.ocr.api.OcrPriority;
import de.uni_goettingen.sub.commons.ocr.api.OcrTextType;
import de.uni_goettingen.sub.commons.ocr.api.OcrQuality;
public class ToAbbyyMapper {
private final static Map<Locale, String> LANGUAGE_MAP = new HashMap<Locale, String>();
private final static Map<OcrTextType, String> TEXTTYPE_MAP = new HashMap<OcrTextType, String>();
private final static Map<OcrFormat, String> FORMAT_MAP = new HashMap<OcrFormat, String>();
private final static Map<OcrPriority, String> PRIORITY_MAP = new HashMap<OcrPriority, String>();
private final static Map<OcrQuality, String> QUALITY_MAP = new HashMap<OcrQuality, String>();
static {
// TODO link finden
// See http://ftp.ics.uci.edu/pub/ietf/http/related/iso639.txt
/**Technical contents of ISO 639:1988 (E/F)
* "Code for the representation of names of languages".
* The Registration Authority for ISO 639 is Infoterm, Osterreichisches
* Normungsinstitut (ON), Postfach 130, A-1021 Vienna, Austria.
*/
// only Abbyy Recognition Languages
LANGUAGE_MAP.put(new Locale("ab"), "Abkhazian"); LANGUAGE_MAP.put(new Locale("az"), "Azerbaijani");
LANGUAGE_MAP.put(new Locale("af"), "Afrikaans"); LANGUAGE_MAP.put(new Locale("am"), "Amharic");
LANGUAGE_MAP.put(new Locale("ay"), "Aymara");
/*LANGUAGE_MAP.put(new Locale("ar"), "Arabic"); LANGUAGE_MAP.put(new Locale("as"), "Assamese");
LANGUAGE_MAP.put(new Locale("aa"), "Afar"); */
LANGUAGE_MAP.put(new Locale("ba"), "Bashkir"); LANGUAGE_MAP.put(new Locale("be"), "Byelorussian");
LANGUAGE_MAP.put(new Locale("bg"), "Bulgarian"); LANGUAGE_MAP.put(new Locale("br"), "Breton");
/*LANGUAGE_MAP.put(new Locale("bo"), "Tibetan"); LANGUAGE_MAP.put(new Locale("bh"), "Bihari");
LANGUAGE_MAP.put(new Locale("bi"), "Bislama"); LANGUAGE_MAP.put(new Locale("bn"), "Bengali");*/
LANGUAGE_MAP.put(new Locale("ca"), "Catalan"); LANGUAGE_MAP.put(new Locale("co"), "Corsican");
LANGUAGE_MAP.put(new Locale("cs"), "Czech"); LANGUAGE_MAP.put(new Locale("cy"), "Welsh");
LANGUAGE_MAP.put(new Locale("da"), "Danish"); LANGUAGE_MAP.put(Locale.GERMAN, "German");
/*LANGUAGE_MAP.put(new Locale("dz"), "Bhutani");*/
LANGUAGE_MAP.put(new Locale("el"), "Greek"); LANGUAGE_MAP.put(Locale.ENGLISH, "English");
LANGUAGE_MAP.put(new Locale("es"), "Spanish"); LANGUAGE_MAP.put(new Locale("et"), "Estonian");
LANGUAGE_MAP.put(new Locale("eu"), "Basque");
/*LANGUAGE_MAP.put(new Locale("eo"), "Esperanto");*/
LANGUAGE_MAP.put(new Locale("fi"), "Finnish"); LANGUAGE_MAP.put(new Locale("fo"), "Faroese");
LANGUAGE_MAP.put(new Locale("fr"), "French"); LANGUAGE_MAP.put(new Locale("fy"), "Frisian");
/*LANGUAGE_MAP.put(new Locale("fa"), "Persian"); LANGUAGE_MAP.put(new Locale("fj"), "Fiji"); */
LANGUAGE_MAP.put(new Locale("ga"), "Irish"); LANGUAGE_MAP.put(new Locale("gd"), "Scots Gaelic");
LANGUAGE_MAP.put(new Locale("gl"), "Galician"); LANGUAGE_MAP.put(new Locale("gn"), "Guarani");
LANGUAGE_MAP.put(new Locale("gu"), "Gujarati");
LANGUAGE_MAP.put(new Locale("ha"), "Hausa"); LANGUAGE_MAP.put(new Locale("he"), "Hebrew");
LANGUAGE_MAP.put(new Locale("hr"), "Croatian"); LANGUAGE_MAP.put(new Locale("hy"), "Armenian");
LANGUAGE_MAP.put(new Locale("hu"), "Hungarian");
/*LANGUAGE_MAP.put(new Locale("hi"), "Hindi"); */
LANGUAGE_MAP.put(new Locale("id"), "Indonesian"); LANGUAGE_MAP.put(new Locale("it"), "Italian");
/*LANGUAGE_MAP.put(new Locale("ie"), "Interlingue"); LANGUAGE_MAP.put(new Locale("ik"), "Inupiak");
LANGUAGE_MAP.put(new Locale("is"), "Icelandic"); LANGUAGE_MAP.put(new Locale("ia"), "Interlingua");
LANGUAGE_MAP.put(new Locale("iu"), "Inuktitut"); */
LANGUAGE_MAP.put(new Locale("ja"), "Japanese");
/*LANGUAGE_MAP.put(new Locale("jw"), "Javanese");*/
LANGUAGE_MAP.put(new Locale("ko"), "Korean"); LANGUAGE_MAP.put(new Locale("ku"), "Kurdish");
LANGUAGE_MAP.put(new Locale("ky"), "Kirghiz"); LANGUAGE_MAP.put(new Locale("kk"), "Kazakh");
/*LANGUAGE_MAP.put(new Locale("ka"), "Georgian"); LANGUAGE_MAP.put(new Locale("kn"), "Kannada");
LANGUAGE_MAP.put(new Locale("kl"), "Greenlandic"); LANGUAGE_MAP.put(new Locale("km"), "Cambodian");
LANGUAGE_MAP.put(new Locale("ks"), "Kashmiri");*/
LANGUAGE_MAP.put(new Locale("la"), "Latin"); LANGUAGE_MAP.put(new Locale("lt"), "Lithuanian");
LANGUAGE_MAP.put(new Locale("lv"), "Latvian");
/*LANGUAGE_MAP.put(new Locale("ln"), "Lingala"); LANGUAGE_MAP.put(new Locale("lo"), "Laothian"); */
LANGUAGE_MAP.put(new Locale("mg"), "Malagasy"); LANGUAGE_MAP.put(new Locale("mi"), "Maori");
LANGUAGE_MAP.put(new Locale("mk"), "Macedonian"); LANGUAGE_MAP.put(new Locale("ms"), "Malay");
LANGUAGE_MAP.put(new Locale("mn"), "Mongolian"); LANGUAGE_MAP.put(new Locale("mo"), "Moldavian");
LANGUAGE_MAP.put(new Locale("mt"), "Maltese");
/*LANGUAGE_MAP.put(new Locale("my"), "Burmese"); LANGUAGE_MAP.put(new Locale("ml"), "Malayalam");
LANGUAGE_MAP.put(new Locale("mr"), "Marathi");
*/
LANGUAGE_MAP.put(new Locale("nl"), "Dutch"); LANGUAGE_MAP.put(new Locale("no"), "Norwegian");
/*LANGUAGE_MAP.put(new Locale("na"), "Nauru"); LANGUAGE_MAP.put(new Locale("ne"), "Nepali");*/
LANGUAGE_MAP.put(new Locale("oc"), "Occitan");
/*LANGUAGE_MAP.put(new Locale("om"), "Oromo");LANGUAGE_MAP.put(new Locale("or"), "Oriya"); */
LANGUAGE_MAP.put(new Locale("pl"), "Polish"); LANGUAGE_MAP.put(new Locale("pt"), "Portuguese");
/*LANGUAGE_MAP.put(new Locale("pa"), "Punjabi"); LANGUAGE_MAP.put(new Locale("ps"), "Pashto"); */
LANGUAGE_MAP.put(new Locale("qu"), "Quechua");
LANGUAGE_MAP.put(new Locale("rm"),"Rhaeto-Romance"); LANGUAGE_MAP.put(new Locale("ru"), "Russian");
LANGUAGE_MAP.put(new Locale("ro"), "Romanian");
/*LANGUAGE_MAP.put(new Locale("rw"), "Kinyarwanda"); LANGUAGE_MAP.put(new Locale("rn"), "Kirundi");*/
LANGUAGE_MAP.put(new Locale("sk"), "Slovak"); LANGUAGE_MAP.put(new Locale("sv"), "Swedish");
LANGUAGE_MAP.put(new Locale("sl"), "Slovenian"); LANGUAGE_MAP.put(new Locale("sm"), "Samoan");
LANGUAGE_MAP.put(new Locale("sn"), "Shona"); LANGUAGE_MAP.put(new Locale("so"), "Somali");
LANGUAGE_MAP.put(new Locale("sq"), "Albanian"); LANGUAGE_MAP.put(new Locale("sr"), "Serbian");
LANGUAGE_MAP.put(new Locale("sw"), "Swahili");
/*LANGUAGE_MAP.put(new Locale("sa"), "Sanskrit"); LANGUAGE_MAP.put(new Locale("sd"), "Sindhi");
LANGUAGE_MAP.put(new Locale("sg"), "Sangho"); LANGUAGE_MAP.put(new Locale("sh"), "Serbo-Croatian");
LANGUAGE_MAP.put(new Locale("si"), "Sinhalese"); LANGUAGE_MAP.put(new Locale("su"), "Sundanese");
LANGUAGE_MAP.put(new Locale("ss"), "Siswati"); LANGUAGE_MAP.put(new Locale("st"), "Sesotho");*/
LANGUAGE_MAP.put(new Locale("tg"), "Tajik"); LANGUAGE_MAP.put(new Locale("th"), "Thai");
LANGUAGE_MAP.put(new Locale("tk"), "Turkmen"); LANGUAGE_MAP.put(new Locale("tl"), "Tagalog");
LANGUAGE_MAP.put(new Locale("to"), "Tonga"); LANGUAGE_MAP.put(new Locale("tr"), "Turkish");
LANGUAGE_MAP.put(new Locale("tt"), "Tatar");
/*LANGUAGE_MAP.put(new Locale("ts"), "Tsonga"); LANGUAGE_MAP.put(new Locale("ti"), "Tigrinya");
LANGUAGE_MAP.put(new Locale("tn"), "Setswana"); LANGUAGE_MAP.put(new Locale("tw"), "Twi");
LANGUAGE_MAP.put(new Locale("ta"), "Tamil"); LANGUAGE_MAP.put(new Locale("te"), "Telugu");*/
LANGUAGE_MAP.put(new Locale("ug"), "Uighur"); LANGUAGE_MAP.put(new Locale("uk"), "Ukrainian");
LANGUAGE_MAP.put(new Locale("uz"), "Uzbek");
/*LANGUAGE_MAP.put(new Locale("ur"), "Urdu");*/
/*LANGUAGE_MAP.put(new Locale("vi"), "Vietnamese"); LANGUAGE_MAP.put(new Locale("vo"), "Volapuk");*/
LANGUAGE_MAP.put(new Locale("wo"), "Wolof");
LANGUAGE_MAP.put(new Locale("xh"), "Xhosa");
LANGUAGE_MAP.put(new Locale("yi"), "Yiddish");
/*LANGUAGE_MAP.put(new Locale("yo"), "Yoruba");*/
LANGUAGE_MAP.put(new Locale("zu"), "Zulu");
LANGUAGE_MAP.put(new Locale("zh"), "Chinese");
/*LANGUAGE_MAP.put(new Locale("za"), "Zhuang"); */
TEXTTYPE_MAP.put(OcrTextType.NORMAL, "Normal");
TEXTTYPE_MAP.put(OcrTextType.TYPEWRITER, "Typewriter");
TEXTTYPE_MAP.put(OcrTextType.MATRIX, "Matrix");
TEXTTYPE_MAP.put(OcrTextType.OCR_A, "OCR_A");
TEXTTYPE_MAP.put(OcrTextType.OCR_B, "OCR_B");
TEXTTYPE_MAP.put(OcrTextType.MICR_E13B, "MICR_E13B");
TEXTTYPE_MAP.put(OcrTextType.GOTHIC, "Gothic");
FORMAT_MAP.put(OcrFormat.DOC, "MSWord");
FORMAT_MAP.put(OcrFormat.HTML, "HTML");
FORMAT_MAP.put(OcrFormat.XHTML, "HTML");
FORMAT_MAP.put(OcrFormat.PDF, "PDF");
FORMAT_MAP.put(OcrFormat.PDFA, "PDFA");
FORMAT_MAP.put(OcrFormat.XML, "XML");
FORMAT_MAP.put(OcrFormat.TXT, "Text");
PRIORITY_MAP.put(OcrPriority.HIGH, "High");
PRIORITY_MAP.put(OcrPriority.ABOVENORMAL, "AboveNormal");
PRIORITY_MAP.put(OcrPriority.NORMAL, "Normal");
PRIORITY_MAP.put(OcrPriority.BELOWNORMAL, "BelowNormal");
PRIORITY_MAP.put(OcrPriority.LOW, "Low");
QUALITY_MAP.put(OcrQuality.BEST, "Thorough");
QUALITY_MAP.put(OcrQuality.BALANCED, "Balanced");
QUALITY_MAP.put(OcrQuality.FAST, "Fast");
}
public static String getLanguage(Locale locale) {
return LANGUAGE_MAP.get(locale);
}
public static String getTextType(OcrTextType textType) {
return TEXTTYPE_MAP.get(textType);
}
public static String getOutputFormat(OcrFormat format) {
return FORMAT_MAP.get(format);
}
public static String getPriority(OcrPriority priority) {
return PRIORITY_MAP.get(priority);
}
public static String getQuality(OcrQuality quality) {
return QUALITY_MAP.get(quality);
}
}