/*******************************************************************************
* Gisgraphy Project
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
*
* Copyright 2008 Gisgraphy project
* David Masclet <davidmasclet@gisgraphy.com>
*
*
*******************************************************************************/
package com.gisgraphy.helper;
import java.io.UnsupportedEncodingException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gisgraphy.domain.Constants;
/**
* Encoding helper
*
* @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
*/
public class EncodingHelper {
protected static final Logger logger = LoggerFactory.getLogger(EncodingHelper.class);
/**
* useful for windows only, this method is a workaround for encoding
* problems on Windows.<br>
* any suggestion are welcomed
*
* @return the string in utf-8
*/
public static String toUTF8(String string) {
String utf8 = "";
try {
utf8 = new String(string.getBytes(Constants.CHARSET));
} catch (UnsupportedEncodingException e1) {
throw new RuntimeException("can not change String Encoding");
}
return utf8;
}
/**
* Set the file.encoding and sun.jnu.encoding to UTF-8
*/
public static void setJVMEncodingToUTF8() {
setSystemProperty("file.encoding", Constants.CHARSET);
setSystemProperty("sun.jnu.encoding", Constants.CHARSET);
}
private static void setSystemProperty(String name, String value) {
if (System.getProperty(name) == null || !System.getProperty(name).equals(value)) {
logger.info("change system property from " + System.getProperty(name) + " to " + value);
System.setProperty(name, value);
logger.info("System property" + name + " is now : " + System.getProperty(name));
} else {
logger.info(name + "=" + System.getProperty("file.encoding"));
}
}
/**
* To replace accented characters in a String by unaccented equivalents.
* source code from lucene ISOLatin1AccentFilter
*/
public final static String removeAccents(String input) {
final StringBuffer output = new StringBuffer();
for (int i = 0; i < input.length(); i++) {
switch (input.charAt(i)) {
case '\u00C0': // À
case '\u00C1': // �?
case '\u00C2': // Â
case '\u00C3': // Ã
case '\u00C4': // Ä
case '\u00C5': // Å
output.append("A");
break;
case '\u00C6': // Æ
output.append("AE");
break;
case '\u00C7': // Ç
output.append("C");
break;
case '\u00C8': // È
case '\u00C9': // É
case '\u00CA': // Ê
case '\u00CB': // Ë
output.append("E");
break;
case '\u00CC': // Ì
case '\u00CD': // �?
case '\u00CE': // Î
case '\u00CF': // �?
output.append("I");
break;
case '\u00D0': // �?
output.append("D");
break;
case '\u00D1': // Ñ
output.append("N");
break;
case '\u00D2': // Ò
case '\u00D3': // Ó
case '\u00D4': // Ô
case '\u00D5': // Õ
case '\u00D6': // Ö
case '\u00D8': // Ø
output.append("O");
break;
case '\u0152': // Œ
output.append("OE");
break;
case '\u00DE': // Þ
output.append("TH");
break;
case '\u00D9': // Ù
case '\u00DA': // Ú
case '\u00DB': // Û
case '\u00DC': // Ü
output.append("U");
break;
case '\u00DD': // �?
case '\u0178': // Ÿ
output.append("Y");
break;
case '\u00E0': // à
case '\u00E1': // á
case '\u00E2': // â
case '\u00E3': // ã
case '\u00E4': // ä
case '\u00E5': // å
output.append("a");
break;
case '\u00E6': // æ
output.append("ae");
break;
case '\u00E7': // ç
output.append("c");
break;
case '\u00E8': // è
case '\u00E9': // é
case '\u00EA': // ê
case '\u00EB': // ë
output.append("e");
break;
case '\u00EC': // ì
case '\u00ED': // í
case '\u00EE': // î
case '\u00EF': // ï
output.append("i");
break;
case '\u00F0': // ð
output.append("d");
break;
case '\u00F1': // ñ
output.append("n");
break;
case '\u00F2': // ò
case '\u00F3': // ó
case '\u00F4': // ô
case '\u00F5': // õ
case '\u00F6': // ö
case '\u00F8': // ø
output.append("o");
break;
case '\u0153': // œ
output.append("oe");
break;
case '\u00DF': // ß
output.append("ss");
break;
case '\u00FE': // þ
output.append("th");
break;
case '\u00F9': // ù
case '\u00FA': // ú
case '\u00FB': // û
case '\u00FC': // ü
output.append("u");
break;
case '\u00FD': // ý
case '\u00FF': // ÿ
output.append("y");
break;
default:
output.append(input.charAt(i));
break;
}
}
return output.toString();
}
}