package org.marketcetera.util.unicode;
import java.util.EnumSet;
import org.marketcetera.util.except.I18NException;
import org.marketcetera.util.misc.ClassVersion;
/**
* A list of signature/charset pairs. This list defines a sequence of
* signatures that we attempt to match (in order) against a byte array
* header, and the associated charset we use if a match is found. Most
* instances of this class correspond to the entries in Table 2 of <a
* href="http://www.icu-project.org/docs/papers/forms_of_unicode/">this
* reference document</a>.
*
* @author tlerios@marketcetera.com
* @since 0.6.0
* @version $Id: Serialization.java 16154 2012-07-14 16:34:05Z colin $
*/
/* $License$ */
@ClassVersion("$Id: Serialization.java 16154 2012-07-14 16:34:05Z colin $")
public enum Serialization
{
UTF8(SignatureCharset.UTF8_UTF8),
UTF8N(SignatureCharset.NONE_UTF8),
UTF16(new SignatureCharset[]
{SignatureCharset.NONE_UTF16BE,
SignatureCharset.UTF16BE_UTF16BE,
SignatureCharset.UTF16LE_UTF16LE}),
UTF16BE(SignatureCharset.NONE_UTF16BE),
UTF16LE(SignatureCharset.NONE_UTF16LE),
UTF32(new SignatureCharset[]
{SignatureCharset.NONE_UTF32BE,
SignatureCharset.UTF32BE_UTF32BE,
SignatureCharset.UTF32LE_UTF32LE}),
UTF32BE(SignatureCharset.NONE_UTF32BE),
UTF32LE(SignatureCharset.NONE_UTF32LE),
UTF16BE_REQ(SignatureCharset.UTF16BE_UTF16BE),
UTF16LE_REQ(SignatureCharset.UTF16LE_UTF16LE),
UTF32BE_REQ(SignatureCharset.UTF32BE_UTF32BE),
UTF32LE_REQ(SignatureCharset.UTF32LE_UTF32LE);
// INSTANCE DATA.
private final SignatureCharset[] mSignatureCharsets;
// CONSTRUCTORS.
/**
* Creates a new serialization with only the given
* signature/charset pair.
*
* @param signatureCharset The pair.
*/
Serialization
(SignatureCharset signatureCharset)
{
mSignatureCharsets=new SignatureCharset[] {signatureCharset};
}
/**
* Creates a new serialization with the given signature/charset
* pairs.
*
* @param signatureCharsets The pairs.
*/
Serialization
(SignatureCharset[] signatureCharsets)
{
mSignatureCharsets=signatureCharsets;
}
// CLASS METHODS.
/**
* Checks whether any of the signatures among the pairs of the
* given serializations matches the header of the given byte
* array, and returns the matching signature/charset pair.
*
* @param candidates The serializations.
* @param data The byte array.
*
* @return The matching signature/charset pair, or null if no
* candidate contains a matching pair. If more than one
* signature/charset pair is a match, the one with the longest
* signature is returned; and if there is more than one with the
* same length, the first such match is returned.
*/
public static SignatureCharset getPrefixMatch
(Serialization[] candidates,
byte[] data)
{
EnumSet<Signature> signatures=EnumSet.noneOf(Signature.class);
for (Serialization serialization:candidates) {
for (SignatureCharset sc:serialization.getSignatureCharsets()) {
signatures.add(sc.getSignature());
}
}
Signature match=Signature.getPrefixMatch
(signatures.toArray(Signature.EMPTY_ARRAY),data);
for (Serialization serialization:candidates) {
for (SignatureCharset sc:serialization.getSignatureCharsets()) {
if (sc.getSignature()==match) {
return sc;
}
}
}
return null;
}
/**
* Decodes the given byte array using the charset paired to a
* signature (among the pairs of the given serializations) that
* matches the array header, and returns the result.
*
* @param candidates The serializations.
* @param data The byte array, which may be null.
*
* @return The decoded string; it is null if the given byte array
* is null.
*
* @throws I18NException Thrown if no match can be found, or if
* the JVM does not support the charset of the matching
* signature/charset pair.
*/
public static String decode
(Serialization[] candidates,
byte[] data)
throws I18NException
{
if (data==null) {
return null;
}
SignatureCharset sc=getPrefixMatch(candidates,data);
if (sc==null) {
throw new I18NException(Messages.NO_SIGNATURE_MATCHES);
}
return sc.decode(data);
}
// INSTANCE METHODS.
/**
* Returns the receiver's signature/charset pairs.
*
* @return The pairs.
*/
public SignatureCharset[] getSignatureCharsets()
{
return mSignatureCharsets;
}
}