package com.mpatric.mp3agic; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.util.Arrays; public class EncodedText { public static final byte TEXT_ENCODING_ISO_8859_1 = 0; public static final byte TEXT_ENCODING_UTF_16 = 1; public static final byte TEXT_ENCODING_UTF_16BE = 2; public static final byte TEXT_ENCODING_UTF_8 = 3; public static final String CHARSET_ISO_8859_1 = "ISO-8859-1"; public static final String CHARSET_UTF_16 = "UTF-16LE"; public static final String CHARSET_UTF_16BE = "UTF-16BE"; public static final String CHARSET_UTF_8 = "UTF-8"; private static final String[] characterSets = { CHARSET_ISO_8859_1, CHARSET_UTF_16, CHARSET_UTF_16BE, CHARSET_UTF_8 }; private static final byte[] textEncodingFallback = {0, 2, 1, 3}; private static final byte[][] boms = { {}, {(byte)0xff, (byte)0xfe}, {(byte) 0xfe, (byte) 0xff}, {} }; private static final byte[][] terminators = { {0}, {0, 0}, {0, 0}, {0} }; private byte[] value; private byte textEncoding; public EncodedText(byte textEncoding, byte[] value) { this.textEncoding = textEncoding; this.value = value; this.stripBomAndTerminator(); } public EncodedText(String string) throws IllegalArgumentException { for (byte textEncoding : textEncodingFallback) { this.textEncoding = textEncoding; value = stringToBytes(string, characterSetForTextEncoding(textEncoding)); if (value != null && this.toString() != null) { this.stripBomAndTerminator(); return; } } throw new IllegalArgumentException("Invalid string, could not find appropriate encoding"); } public EncodedText(String string, byte transcodeToTextEncoding) throws IllegalArgumentException, CharacterCodingException { this(string); setTextEncoding(transcodeToTextEncoding, true); } public EncodedText(byte textEncoding, String string) { this.textEncoding = textEncoding; value = stringToBytes(string, characterSetForTextEncoding(textEncoding)); this.stripBomAndTerminator(); } public EncodedText(byte[] value) { this(textEncodingForBytesFromBOM(value), value); } private static byte textEncodingForBytesFromBOM(byte[] value) { if (value.length >= 2 && value[0] == (byte)0xff && value[1] == (byte)0xfe) { return TEXT_ENCODING_UTF_16; } else if (value.length >= 2 && value[0] == (byte)0xfe && value[1] == (byte)0xff) { return TEXT_ENCODING_UTF_16BE; } else if (value.length >= 3 && (value[0] == (byte)0xef && value[1] == (byte)0xbb && value[2] == (byte)0xbf)) { return TEXT_ENCODING_UTF_8; } else { return TEXT_ENCODING_ISO_8859_1; } } private static String characterSetForTextEncoding(byte textEncoding) { try { return characterSets[textEncoding]; } catch (ArrayIndexOutOfBoundsException e) { throw new IllegalArgumentException("Invalid text encoding " + textEncoding); } } private void stripBomAndTerminator() { int leadingCharsToRemove = 0; if (value.length >= 2 && ((value[0] == (byte)0xfe && value[1] == (byte)0xff) || (value[0] == (byte)0xff && value[1] == (byte)0xfe))) { leadingCharsToRemove = 2; } else if (value.length >= 3 && (value[0] == (byte)0xef && value[1] == (byte)0xbb && value[2] == (byte)0xbf)) { leadingCharsToRemove = 3; } int trailingCharsToRemove = 0; byte[] terminator = terminators[textEncoding]; if (value.length - leadingCharsToRemove >= terminator.length) { boolean haveTerminator = true; for (int i = 0; i < terminator.length; i++) { if (value[value.length - terminator.length + i] != terminator[i]) { haveTerminator = false; break; } } if (haveTerminator) trailingCharsToRemove = terminator.length; } if (leadingCharsToRemove + trailingCharsToRemove > 0) { int newLength = value.length - leadingCharsToRemove - trailingCharsToRemove; byte[] newValue = new byte[newLength]; if (newLength > 0) { System.arraycopy(value, leadingCharsToRemove, newValue, 0, newValue.length); } value = newValue; } } public byte getTextEncoding() { return textEncoding; } public void setTextEncoding(byte textEncoding) throws CharacterCodingException { setTextEncoding(textEncoding, true); } public void setTextEncoding(byte textEncoding, boolean transcode) throws CharacterCodingException { if (this.textEncoding != textEncoding) { CharBuffer charBuffer = bytesToCharBuffer(this.value, characterSetForTextEncoding(this.textEncoding)); byte[] transcodedBytes = charBufferToBytes(charBuffer, characterSetForTextEncoding(textEncoding)); this.textEncoding = textEncoding; this.value = transcodedBytes; } } public byte[] getTerminator() { return terminators[textEncoding]; } public byte[] toBytes() { return toBytes(false, false); } public byte[] toBytes(boolean includeBom) { return toBytes(includeBom, false); } public byte[] toBytes(boolean includeBom, boolean includeTerminator) { characterSetForTextEncoding(textEncoding); // ensured textEncoding is valid int newLength = value.length + (includeBom ? boms[textEncoding].length : 0) + (includeTerminator ? getTerminator().length : 0); if (newLength == value.length) { return value; } else { byte bytes[] = new byte[newLength]; int i = 0; if (includeBom) { byte[] bom = boms[textEncoding]; if (bom.length > 0) { System.arraycopy(boms[textEncoding], 0, bytes, i, boms[textEncoding].length); i += boms[textEncoding].length; } } if (value.length > 0) { System.arraycopy(value, 0, bytes, i, value.length); i += value.length; } if (includeTerminator) { byte[] terminator = getTerminator(); if (terminator.length > 0) { System.arraycopy(terminator, 0, bytes, i, terminator.length); } } return bytes; } } public String toString() { try { return bytesToString(value, characterSetForTextEncoding(textEncoding)); } catch (CharacterCodingException e) { return null; } } public String getCharacterSet() { return characterSetForTextEncoding(textEncoding); } public boolean equals(Object obj) { if (! (obj instanceof EncodedText)) return false; if (super.equals(obj)) return true; EncodedText other = (EncodedText) obj; if (textEncoding != other.textEncoding) return false; if (! Arrays.equals(value, other.value)) return false; return true; } public static String bytesToString(byte[] bytes, String characterSet) throws CharacterCodingException { CharBuffer cbuf = bytesToCharBuffer(bytes, characterSet); String s = cbuf.toString(); int length = s.indexOf(0); if (length == -1) return s; return s.substring(0, length); } protected static CharBuffer bytesToCharBuffer(byte[] bytes, String characterSet) throws CharacterCodingException { Charset charset = Charset.forName(characterSet); CharsetDecoder decoder = charset.newDecoder(); return decoder.decode(ByteBuffer.wrap(bytes)); } public static byte[] stringToBytes(String s, String characterSet) { try { return charBufferToBytes(CharBuffer.wrap(s), characterSet); } catch (CharacterCodingException e) { return null; } } protected static byte[] charBufferToBytes(CharBuffer charBuffer, String characterSet) throws CharacterCodingException { Charset charset = Charset.forName(characterSet); CharsetEncoder encoder = charset.newEncoder(); ByteBuffer byteBuffer = encoder.encode(charBuffer); return BufferTools.copyBuffer(byteBuffer.array(), 0, byteBuffer.limit()); } }