package com.cloudhopper.commons.charset; /* * #%L * ch-commons-charset * %% * Copyright (C) 2012 Cloudhopper by Twitter * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.util.HashMap; /** * Utility class for encoding and decoding between Strings and byte arrays. * * @author joelauer (twitter: @jjlauer or <a href="http://twitter.com/jjlauer" target=window>http://twitter.com/jjlauer</a>) */ public class CharsetUtil { public static final HashMap<String,Charset> charsets; public static final String NAME_ISO_8859_1 = "ISO-8859-1"; public static final String NAME_ISO_8859_15 = "ISO-8859-15"; public static final String NAME_GSM = "GSM"; public static final String NAME_PACKED_GSM = "PACKED-GSM"; public static final String NAME_UCS_2 = "UCS-2"; public static final String NAME_UCS_2LE = "UCS-2LE"; public static final String NAME_UTF_8 = "UTF-8"; /** Modified version of UTF-8 designed mostly for serialization and speed. The byte arrays it produces is NOT identical to UTF-8 but is safe to use for Java-to-Java serialization. */ public static final String NAME_MODIFIED_UTF8 = "MODIFIED-UTF8"; // special charset for "Airwide SMSCs" that have a unique GSM mapping public static final String NAME_AIRWIDE_IA5 = "AIRWIDE-IA5"; // special charset for "Vodafone M2" SMSC that has a unique GSM mapping public static final String NAME_VFD2_GSM = "VFD2-GSM"; // special charset for "Vodafone Turkey" SMSC that has a unique GSM mapping public static final String NAME_VFTR_GSM = "VFTR-GSM"; /** Alias for "PACKED-GSM" */ public static final String NAME_GSM7 = "GSM7"; /** Alias for "GSM" */ public static final String NAME_GSM8 = "GSM8"; /** Alias for "AIRWIDE-IA5" */ public static final String NAME_AIRWIDE_GSM = "AIRWIDE-GSM"; // special charset for "T-Mobile Netherlands" SMSC that has a unique GSM mapping public static final String NAME_TMOBILENL_GSM = "TMOBILE-NL-GSM"; public static final Charset CHARSET_ISO_8859_1 = new ISO88591Charset(); public static final Charset CHARSET_ISO_8859_15 = new ISO885915Charset(); public static final Charset CHARSET_GSM = new GSMCharset(); public static final Charset CHARSET_PACKED_GSM = new PackedGSMCharset(); public static final Charset CHARSET_UCS_2 = new UCS2Charset(); public static final Charset CHARSET_UCS_2LE = new UCS2LECharset(); public static final Charset CHARSET_UTF_8 = new UTF8Charset(); /** Modified version of UTF-8 designed mostly for serialization and speed. The byte arrays it produces is NOT identical to UTF-8 but is safe to use for Java-to-Java serialization. */ public static final Charset CHARSET_MODIFIED_UTF8 = new ModifiedUTF8Charset(); public static final Charset CHARSET_AIRWIDE_IA5 = new AirwideIA5Charset(); public static final Charset CHARSET_VFD2_GSM = new VFD2GSMCharset(); public static final Charset CHARSET_VFTR_GSM = new VFTRGSMCharset(); /** Alias for "PACKED-GSM" */ public static final Charset CHARSET_GSM7 = CHARSET_PACKED_GSM; /** Alias for "GSM" */ public static final Charset CHARSET_GSM8 = CHARSET_GSM; /** Alias for "AIRWIDE-IA5" */ public static final Charset CHARSET_AIRWIDE_GSM = CHARSET_AIRWIDE_IA5; public static final Charset CHARSET_TMOBILENL_GSM = new TMobileNlGSMCharset(); static { charsets = new HashMap<String,Charset>(); charsets.put(NAME_ISO_8859_1, CHARSET_ISO_8859_1); charsets.put(NAME_ISO_8859_15, CHARSET_ISO_8859_15); charsets.put(NAME_GSM, CHARSET_GSM); charsets.put(NAME_MODIFIED_UTF8, CHARSET_MODIFIED_UTF8); charsets.put(NAME_PACKED_GSM, CHARSET_PACKED_GSM); charsets.put(NAME_UCS_2, CHARSET_UCS_2); charsets.put(NAME_UCS_2LE, CHARSET_UCS_2LE); charsets.put(NAME_UTF_8, CHARSET_UTF_8); charsets.put(NAME_AIRWIDE_IA5, CHARSET_AIRWIDE_IA5); charsets.put(NAME_VFD2_GSM, CHARSET_VFD2_GSM); charsets.put(NAME_VFTR_GSM, CHARSET_VFTR_GSM); charsets.put(NAME_GSM7, CHARSET_GSM7); charsets.put(NAME_GSM8, CHARSET_GSM8); charsets.put(NAME_AIRWIDE_GSM, CHARSET_AIRWIDE_GSM); charsets.put(NAME_TMOBILENL_GSM, CHARSET_TMOBILENL_GSM); } static public HashMap<String,Charset> getCharsetMap() { return charsets; } static public Charset map(String charsetName) { String upperCharsetName = charsetName.toUpperCase(); return charsets.get(upperCharsetName); } static public byte[] encode(CharSequence str0, String charsetName) { Charset charset = map(charsetName); if (charset == null) { return null; } return encode(str0, charset); } static public byte[] encode(CharSequence str0, Charset charset) { return charset.encode(str0); } static public void decode(byte[] bytes, StringBuilder buffer, String charsetName) { Charset charset = map(charsetName); if (charset == null) { // do nothing return; } decode(bytes, buffer, charset); } static public void decode(byte[] bytes, StringBuilder buffer, Charset charset) { charset.decode(bytes, buffer); } static public String decode(byte[] bytes, String charsetName) { Charset charset = map(charsetName); if (charset == null) { return null; } return charset.decode(bytes); } static public String decode(byte[] bytes, Charset charset) { return charset.decode(bytes); } static public String normalize(CharSequence str0, String charsetName) { Charset charset = map(charsetName); if (charset == null) { throw new IllegalArgumentException("Unsupported charset [" + charsetName + "]"); } return normalize(str0, charset); } static public String normalize(CharSequence str0, Charset charset) { return charset.normalize(str0); } }