package org.marketcetera.util.unicode; import java.nio.charset.Charset; import org.apache.commons.lang.CharEncoding; import org.marketcetera.util.except.I18NException; import org.marketcetera.util.log.I18NBoundMessage1P; import org.marketcetera.util.misc.ClassVersion; /** * A thin wrapper around {@link Charset} for the Unicode charsets. * * <h4>Notes:</h4> * * <p>Java inserts a BOM when a string is encoded into a byte array * with the generic (neither LE or BE) UTF-16 charset. Java does not * do that for the generic UTF-32, or UTF-8. As a result, the generic * UTF-16 and UTF-32 are avoided by other classes in this package, * opting instead for the more consistent LE/BE variants that never * insert a BOM (and, upon decoding, ignore one if present).</p> * * <p>Since Java 5, strings are internally stored in UTF-16, not * UCS-2, which means that they can represent code points above * 0xFFFF.</p> * * <p>UTF-32 encodings may not be supported on all platforms.</p> * * @author tlerios@marketcetera.com * @since 0.6.0 * @version $Id: UnicodeCharset.java 16154 2012-07-14 16:34:05Z colin $ */ /* $License$ */ @ClassVersion("$Id: UnicodeCharset.java 16154 2012-07-14 16:34:05Z colin $") public enum UnicodeCharset { UTF8(CharEncoding.UTF_8), UTF16BE(CharEncoding.UTF_16BE), UTF16LE(CharEncoding.UTF_16LE), UTF16(CharEncoding.UTF_16), UTF32BE("UTF-32BE"), //$NON-NLS-1$ UTF32LE("UTF-32LE"), //$NON-NLS-1$ UTF32("UTF-32"); //$NON-NLS-1$ // INSTANCE DATA. private final String mName; private final Charset mCharset; // CONSTRUCTORS. /** * Creates a new charset with the given name. * * @param name The charset name as understood by {@link * Charset.forName(String)}. */ UnicodeCharset(String name) { mName=name; Charset charset=null; try { charset=Charset.forName(getName()); } catch (IllegalArgumentException ex) { Messages.UNKNOWN_CHARSET.warn(this,ex,getName()); } mCharset=charset; } // INSTANCE METHODS. /** * Returns the receiver's name. * * @return The name. */ public String getName() { return mName; } /** * Returns the receiver's wrapped Java charset. * * @return The charset. It is null if the JVM does not support * this charset. */ public Charset getCharset() { return mCharset; } /** * Checks whether the JVM supports the receiver's charset. * * @return True if so. */ public boolean isSupported() { return (getCharset()!=null); } /** * Asserts that the JVM supports the receiver's charset. * * @throws I18NException Thrown if it does not. */ public void assertSupported() throws I18NException { if (!isSupported()) { throw new I18NException (new I18NBoundMessage1P (Messages.UNKNOWN_CHARSET,getName())); } } /** * Decodes the given portion of the given byte array using the * receiver's charset, and returns the result. * * @param data The byte array, which may be null. * @param offset The starting point for decoding. * @param length The number of bytes to decode. * * @return The decoded string; it is null if the given byte array * is null. * * @throws I18NException Thrown if the receiver is not a supported * JVM charset. */ public String decode (byte[] data, int offset, int length) throws I18NException { if (data==null) { return null; } assertSupported(); return new String(data,offset,length,getCharset()); } /** * Decodes the given byte array using the receiver's charset, and * returns the result. * * @param data The byte array, which may be null. * * @return The decoded string; it is null if the given byte array * is null. * * @throws I18NException Thrown if the receiver is not a supported * JVM charset. */ public String decode (byte[] data) throws I18NException { if (data==null) { return null; } assertSupported(); return new String(data,getCharset()); } /** * Encodes the given string using the receiver's charset, and * returns the result. * * @param data The string, which may be null. * * @return The encoded byte array; it is null if the given string * is null. * * @throws I18NException Thrown if the receiver is not a supported * JVM charset. */ public byte[] encode (String data) throws I18NException { if (data==null) { return null; } assertSupported(); return data.getBytes(getCharset()); } }