/* * CATMA Computer Aided Text Markup and Analysis * * Copyright (C) 2009 University Of Hamburg * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.catma.document.source; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import de.catma.ExceptionHandler; /** * Builds and provides a mapping of geographical regions to languages to character * encodings based on the resourcefile CharsetLanguageInfo.properties<br> * Character Encodings which are not mentioned in that file are listed under the * category "others".<br> * <br> * This class is a singleton. * * @author marco.petris@web.d * */ public enum CharsetLanguageInfo { SINGLETON; private static final String OTHERS = "OTHERS"; // resource which contains the mapping information private Properties charsetLanguageInfoProperties; // region->language->charsetList private Map<String,Map<String,List<Charset>>> regionLanguageCharsetMapping; // encodings for which have no information concerning region and language private Map<String,List<Charset>> simpleCategoryCharsetMapping; /** * Builds the mappings. */ private CharsetLanguageInfo() { regionLanguageCharsetMapping = new HashMap<String, Map<String,List<Charset>>>(); simpleCategoryCharsetMapping = new HashMap<String, List<Charset>>(); simpleCategoryCharsetMapping.put( OTHERS, new ArrayList<Charset>() ); charsetLanguageInfoProperties = new Properties(); // load resource file with mapping infos InputStream is = null; try { is = Thread.currentThread().getContextClassLoader().getResourceAsStream( "de/catma/document/source/resources/" + "CharsetLanguageInfo.properties" ); try { charsetLanguageInfoProperties.load( is ); } catch( IOException e ) { ExceptionHandler.log( e ); } } finally { if( is != null ) { try { is.close(); } catch( Exception e ) { // not of importance } } } // we loop through the available charset and try to apply them to the mapping Map<String,Charset> availableCharsets = Charset.availableCharsets(); for( String charsetName : availableCharsets.keySet() ) { String languageInfo = charsetLanguageInfoProperties.getProperty( charsetName ); // do we have infos about this charset? if( ( languageInfo != null ) && !( languageInfo.equals( "" ) ) ) { // yes, so we try to extract language and region String[] infos = languageInfo.trim().split( "," ); if( infos.length >= 2 ) { String language = infos[0]; String region = infos[1]; // create region mapping if not present if( !regionLanguageCharsetMapping.containsKey( region ) ) { regionLanguageCharsetMapping.put( region, new HashMap<String, List<Charset>>() ); } // create language mapping if not present if( !regionLanguageCharsetMapping.get( region ).containsKey( language ) ) { regionLanguageCharsetMapping.get( region ).put( language, new ArrayList<Charset>() ); } // add this charset its region/languge regionLanguageCharsetMapping.get( region ).get( language ).add( availableCharsets.get( charsetName ) ); } else { // no region/language information // we use that what we have as a category String cat = infos[0]; if( !simpleCategoryCharsetMapping.containsKey( cat ) ) { simpleCategoryCharsetMapping.put( cat, new ArrayList<Charset>() ); } simpleCategoryCharsetMapping.get( cat ).add( availableCharsets.get( charsetName ) ); } } else { // no infos for that charset, it goes to the 'others' category simpleCategoryCharsetMapping.get( OTHERS ).add( availableCharsets.get( charsetName ) ); } } } /** * @return a mapping region->language->charset */ public Map<String, Map<String, List<Charset>>> getRegionLanguageCharsetMapping() { return regionLanguageCharsetMapping; } /** * @return a mapping category->charset (these charsets came without language/region infos) */ public Map<String, List<Charset>> getCategoryCharsetMapping() { return simpleCategoryCharsetMapping; } }