/*
* Copyright 2008-2014 the original author or authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kaleidofoundry.core.lang;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
/**
* Charsets enum for : <br/>
* <ul>
* <li>UTF-8</li>
* <li>UTF-16</li>
* <li>UTF-16BE</li>
* <li>UTF-16LE</li>
* <li>ISO-8859-1</li>
* <li>US-ASCII</li>
* </ul>
* Use getCode() to have iso encoding code <br/>
* <br/>
*
* @author jraduget
*/
public enum Charsets {
/**
* UTF-8 is an 8-bit encoding scheme. Characters from the English-language alphabet are all encoded using an 8-bit byte. Characters
* for
* other languages are encoded using 2, 3, or even 4 bytes. UTF-8 therefore produces compact documents for the English language, but
* for
* other languages, documents tend to be half again as large as they would be if they used UTF-16. If the majority of a document's
* text
* is
* in a Western European language, then UTF-8 is generally a good choice because it allows for internationalization while still
* minimizing
* the space required for encoding.
*/
UTF_8("UTF-8"),
/**
* UTF-16 is a 16-bit encoding scheme. It is large enough to encode all the characters from all the alphabets in the world. It uses 16
* bits for most characters but includes 32-bit characters for ideogram-based languages such as Chinese. A Western European-language
* document that uses UTF-16 will be twice as large as the same document encoded using UTF-8. But documents written in far Eastern
* languages will be far smaller using UTF-16.
* UTF-16 Sixteen-bit UCS Transformation Format, byte order identified by an optional byte-order mark
*/
UTF_16("UTF-16"),
/** UTF-16BE Sixteen-bit UCS Transformation Format, big-endian byte order */
UTF_16BE("UTF-16BE"),
/** UTF-16LE Sixteen-bit UCS Transformation Format, little-endian byte order */
UTF_16BLE("UTF-16LE"),
/**
* ISO-8859-1 is the character set for Western European languages. It's an 8-bit encoding scheme in which every encoded character
* takes
* exactly 8 bits. (With the remaining character sets, on the other hand, some codes are reserved to signal the start of a multibyte
* character.)
*/
ISO_8859_1("ISO-8859-1"),
/**
* US-ASCII is a 7-bit character set and encoding that covers the English-language alphabet. It is not large enough to cover the
* characters used in other languages, however, so it is not very useful for internationalization.
* US-ASCII Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set
*/
US_ASCII("US-ASCII");
private final String code;
private final Charset charset;
Charsets(final String code) {
this.code = code;
charset = Charset.forName(code);
}
/**
* @return iso code of the encoding or charset
*/
public String getCode() {
return code;
}
/**
* @return new instance of the current charset code
*/
public Charset getCharset() {
return charset;
}
/**
* @param message text to encode
* @return encoded buffer
*/
public ByteBuffer encode(final String message) {
return charset.encode(message);
}
/**
* @param buffer buffer to decode
* @return decoded buffer
*/
public CharBuffer decode(final ByteBuffer buffer) {
return charset.decode(buffer);
}
}