package org.jaudiotagger.tag.datatype; import org.jaudiotagger.tag.InvalidDataTypeException; import org.jaudiotagger.tag.TagOptionSingleton; import org.jaudiotagger.tag.id3.AbstractTagFrameBody; import org.jaudiotagger.tag.id3.valuepair.TextEncoding; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.*; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * Represents a String which is not delimited by null character. * * This type of String will usually only be used when it is the last field within a frame, when reading the remainder of * the byte array will be read, when writing the frame will be accommodate the required size for the String. The String * will be encoded based upon the text encoding of the frame that it belongs to. * * All TextInformation frames support multiple strings, stored as a null separated list, where null is represented by * the termination code for the character encoding. This functionality is only officially support in ID3v24. * * Most applications will ignore any but the first value, but some such as Foobar 2000 will decode them properly * * iTunes write null terminators characters after the String even though it only writes a single value. * * */ public class TextEncodedStringSizeTerminated extends AbstractString { /** * Creates a new empty TextEncodedStringSizeTerminated datatype. * * @param identifier identifies the frame type * @param frameBody */ public TextEncodedStringSizeTerminated(String identifier, AbstractTagFrameBody frameBody) { super(identifier, frameBody); } /** * Copy constructor * * @param object */ public TextEncodedStringSizeTerminated(TextEncodedStringSizeTerminated object) { super(object); } public boolean equals(Object obj) { if(this==obj) { return true; } return obj instanceof TextEncodedStringSizeTerminated && super.equals(obj); } /** * Read a 'n' bytes from buffer into a String where n is the framesize - offset * so therefore cannot use this if there are other objects after it because it has no * delimiter. * * Must take into account the text encoding defined in the Encoding Object * ID3 Text Frames often allow multiple strings seperated by the null char * appropriate for the encoding. * * @param arr this is the buffer for the frame * @param offset this is where to start reading in the buffer for this field * @throws NullPointerException * @throws IndexOutOfBoundsException */ public void readByteArray(byte[] arr, int offset) throws InvalidDataTypeException { logger.finest("Reading from array from offset:" + offset); //Get the Specified Decoder String charSetName = getTextEncodingCharSet(); CharsetDecoder decoder = Charset.forName(charSetName).newDecoder(); decoder.reset(); //Decode sliced inBuffer ByteBuffer inBuffer; //#302 [dallen] truncating array manually since the decoder.decode() does not honor the offset in the in buffer byte[] truncArr = new byte[arr.length - offset]; System.arraycopy(arr, offset, truncArr, 0, truncArr.length); inBuffer = ByteBuffer.wrap(truncArr); CharBuffer outBuffer = CharBuffer.allocate(arr.length - offset); CoderResult coderResult = decoder.decode(inBuffer, outBuffer, true); if (coderResult.isError()) { logger.warning("Decoding error:" + coderResult.toString()); } decoder.flush(outBuffer); outBuffer.flip(); //If using UTF16 with BOM we then search through the text removing any BOMs that could exist //for multiple values, BOM could be Big Endian or Little Endian if (charSetName.equals(TextEncoding.CHARSET_UTF_16)) { value = outBuffer.toString().replace("\ufeff","").replace("\ufffe",""); } else { value = outBuffer.toString(); } //SetSize, important this is correct for finding the next datatype setSize(arr.length - offset); logger.config("Read SizeTerminatedString:" + value + " size:" + size); } /** * Write String using specified encoding * * When this is called multiple times, all but the last value has a trailing null * * @param encoder * @param next * @param i * @param noOfValues * @return * @throws CharacterCodingException */ private ByteBuffer writeString( CharsetEncoder encoder, String next, int i, int noOfValues) throws CharacterCodingException { ByteBuffer bb; if(( i + 1) == noOfValues ) { bb = encoder.encode(CharBuffer.wrap(next)); } else { bb = encoder.encode(CharBuffer.wrap(next + '\0')); } bb.rewind(); return bb; } /** * Write String in UTF-LEBOM format * * When this is called multiple times, all but the last value has a trailing null * * Remember we are using this charset because the charset that writes BOM does it the wrong way for us * so we use this none and then manually add the BOM ourselves. * * @param next * @param i * @param noOfValues * @return * @throws CharacterCodingException */ private ByteBuffer writeStringUTF16LEBOM( String next, int i, int noOfValues) throws CharacterCodingException { CharsetEncoder encoder = Charset.forName(TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT).newEncoder(); encoder.onMalformedInput(CodingErrorAction.IGNORE); encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); ByteBuffer bb = null; //Note remember LE BOM is ff fe but this is handled by encoder Unicode char is fe ff if(( i + 1)==noOfValues) { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next )); } else { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0')); } bb.rewind(); return bb; } /** * Write String in UTF-BEBOM format * * When this is called multiple times, all but the last value has a trailing null * * @param next * @param i * @param noOfValues * @return * @throws CharacterCodingException */ private ByteBuffer writeStringUTF16BEBOM( String next, int i, int noOfValues) throws CharacterCodingException { CharsetEncoder encoder = Charset.forName(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT).newEncoder(); encoder.onMalformedInput(CodingErrorAction.IGNORE); encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); ByteBuffer bb = null; //Add BOM if(( i + 1)==noOfValues) { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next )); } else { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0')); } bb.rewind(); return bb; } /** * Removing trailing null from end of String, this should be there but some applications continue to write * this unnecessary null char. */ private void stripTrailingNull() { if (TagOptionSingleton.getInstance().isRemoveTrailingTerminatorOnWrite()) { String stringValue = (String) value; if (stringValue.length() > 0) { if (stringValue.charAt(stringValue.length() - 1) == '\0') { stringValue = (stringValue).substring(0, stringValue.length() - 1); value = stringValue; } } } } /** * Because nulls are stripped we need to check if not removing trailing nulls whether the original * value ended with a null and if so add it back in. * @param values * @param stringValue */ private void checkTrailingNull( List<String> values, String stringValue) { if(!TagOptionSingleton.getInstance().isRemoveTrailingTerminatorOnWrite()) { if (stringValue.length() > 0 && stringValue.charAt(stringValue.length() - 1) == '\0') { String lastVal = values.get(values.size() - 1); String newLastVal = lastVal + '\0'; values.set(values.size() - 1,newLastVal); } } } /** * Write String into byte array * * It will remove a trailing null terminator if exists if the option * RemoveTrailingTerminatorOnWrite has been set. * * @return the data as a byte array in format to write to file */ public byte[] writeByteArray() { byte[] data; //Try and write to buffer using the CharSet defined by getTextEncodingCharSet() String charSetName = getTextEncodingCharSet(); try { stripTrailingNull(); //Special Handling because there is no UTF16 BOM LE charset String stringValue = (String)value; String actualCharSet = null; if (charSetName.equals(TextEncoding.CHARSET_UTF_16)) { if(TagOptionSingleton.getInstance().isEncodeUTF16BomAsLittleEndian()) { actualCharSet = TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT; } else { actualCharSet = TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT; } } //Ensure large enough for any encoding ByteBuffer outputBuffer = ByteBuffer.allocate((stringValue.length() + 3)* 3); //Ensure each string (if multiple values) is written with BOM by writing separately List<String> values = splitByNullSeperator(stringValue); checkTrailingNull(values, stringValue); //For each value for(int i=0;i<values.size();i++) { String next = values.get(i); if(actualCharSet!=null) { if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT)) { outputBuffer.put(writeStringUTF16LEBOM( next, i, values.size())); } else if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT)) { outputBuffer.put(writeStringUTF16BEBOM( next, i, values.size())); } } else { CharsetEncoder charsetEncoder = Charset.forName(charSetName).newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.IGNORE); charsetEncoder.onUnmappableCharacter(CodingErrorAction.IGNORE); outputBuffer.put(writeString( charsetEncoder, next, i, values.size())); } } outputBuffer.flip(); data = new byte[outputBuffer.limit()]; outputBuffer.rewind(); outputBuffer.get(data, 0, outputBuffer.limit()); setSize(data.length); } //https://bitbucket.org/ijabz/jaudiotagger/issue/1/encoding-metadata-to-utf-16-can-fail-if catch (CharacterCodingException ce) { logger.severe(ce.getMessage()+":"+charSetName+":"+value); throw new RuntimeException(ce); } return data; } /** * Get the text encoding being used. * * The text encoding is defined by the frame body that the text field belongs to. * * @return the text encoding charset */ protected String getTextEncodingCharSet() { byte textEncoding = this.getBody().getTextEncoding(); String charSetName = TextEncoding.getInstanceOf().getValueForId(textEncoding); logger.finest("text encoding:" + textEncoding + " charset:" + charSetName); return charSetName; } /** * Split the values separated by null character * * @param value the raw value * @return list of values, guaranteed to be at least one value */ public static List<String> splitByNullSeperator(String value) { String[] valuesarray = value.split("\\u0000"); List<String> values = Arrays.asList(valuesarray); //Read only list so if empty have to create new list if (values.size() == 0) { values = new ArrayList<String>(1); values.add(""); } return values; } /** * Add an additional String to the current String value * * @param value */ public void addValue(String value) { setValue(this.value + "\u0000" + value); } /** * How many values are held, each value is separated by a null terminator * * @return number of values held, usually this will be one. */ public int getNumberOfValues() { return splitByNullSeperator(((String) value)).size(); } /** * Get the nth value * * @param index * @return the nth value * @throws IndexOutOfBoundsException if value does not exist */ public String getValueAtIndex(int index) { //Split String into separate components List values = splitByNullSeperator((String) value); return (String) values.get(index); } /** * * @return list of all values */ public List<String> getValues() { return splitByNullSeperator((String) value); } /** * Get value(s) whilst removing any trailing nulls * * @return */ public String getValueWithoutTrailingNull() { List<String> values = splitByNullSeperator((String) value); StringBuffer sb = new StringBuffer(); for(int i=0;i<values.size();i++) { if(i!=0) { sb.append("\u0000"); } sb.append(values.get(i)); } return sb.toString(); } }