package org.marketcetera.util.unicode; import org.apache.commons.lang.ArrayUtils; import org.marketcetera.util.misc.ClassVersion; /** * A byte stream signature. It appears in the beginning (header) of a * byte stream, and identifies the charset necessary to interpret the * remaining bytes as text. * * @author tlerios@marketcetera.com * @since 0.6.0 * @version $Id: Signature.java 16154 2012-07-14 16:34:05Z colin $ */ /* $License$ */ @ClassVersion("$Id: Signature.java 16154 2012-07-14 16:34:05Z colin $") public enum Signature { NONE(ArrayUtils.EMPTY_BYTE_ARRAY), UTF8(new byte[] {(byte)0xEF,(byte)0xBB,(byte)0xBF}), UTF16BE(new byte[] {(byte)0xFE,(byte)0xFF}), UTF16LE(new byte[] {(byte)0xFF,(byte)0xFE}), UTF32BE(new byte[] {(byte)0x00,(byte)0x00,(byte)0xFE,(byte)0xFF}), UTF32LE(new byte[] {(byte)0xFF,(byte)0xFE,(byte)0x00,(byte)0x00}); // CLASS DATA. public static final Signature[] EMPTY_ARRAY=new Signature[0]; // INSTANCE DATA. private final byte[] mMark; // CONSTRUCTORS. /** * Creates a new signature with the given BOM. * * @param mark The BOM. */ Signature (byte[] mark) { mMark=mark; } // CLASS METHODS. /** * Returns the maximum length of any signature BOM. * * @return The length. */ public static int getLongestLength() { int max=-1; for (Signature signature:values()) { int len=signature.getLength(); if (len>max) { max=len; } } return max; } /** * Checks whether any of the given signatures matches the header * of the given byte array. * * @param candidates The signatures. * @param data The byte array. * * @return The matching signature, or null if no candidate is a * match. If more than one candidate is a match, the one with the * longest signature is returned; and if there is more than one * with the same length, the first such match is returned. */ public static Signature getPrefixMatch (Signature[] candidates, byte[] data) { int max=-1; Signature result=null; for (Signature signature:candidates) { if (signature.prefixMatch(data)) { int len=signature.getLength(); if (len>max) { result=signature; max=len; } } } return result; } // INSTANCE METHODS. /** * Returns the receiver's BOM. * * @return The BOM. */ public byte[] getMark() { return mMark; } /** * Returns the receiver's BOM length. * * @return The BOM length. */ public int getLength() { return getMark().length; } /** * Checks whether the receiver's BOM matches the header of the * given byte array. * * @param data The byte array. * * @return True if so. */ public boolean prefixMatch (byte[] data) { if (data.length<getLength()) { return false; } for (int i=0;i<getLength();i++) { if (data[i]!=getMark()[i]) { return false; } } return true; } }