package com.limegroup.gnutella.metadata;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.DataInputStream;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import com.limegroup.gnutella.util.CountingInputStream;
import com.limegroup.gnutella.util.IOUtils;
import com.limegroup.gnutella.ByteOrder;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
/**
* A parser for reading ASF files.
* Everything we understand is stored.
*
* This is initially based off the work of Reed Esau, in his excellent ptarmigan package,
* from http://ptarmigan.sourceforge.net/ . This was also based off of the work
* in the XNap project, from
* http://xnap.sourceforge.net/xref/org/xnap/plugin/viewer/videoinfo/VideoFile.html ,
* which in turn was based off the work from the avifile project, at
* http://avifile.sourceforge.net/ .
*/
class ASFParser {
private static final Log LOG = LogFactory.getLog(ASFParser.class);
// data types we know about in the extended content description.
// THESE ARE WRONG (but close enough for now)
private static final int TYPE_STRING = 0;
private static final int TYPE_BINARY = 1;
private static final int TYPE_BOOLEAN = 2;
private static final int TYPE_INT = 3;
private static final int TYPE_LONG = 4;
private String _album, _artist, _title, _year, _copyright,
_rating, _genre, _comment, _drmType;
private short _track = -1;
private int _bitrate = -1, _length = -1, _width = -1, _height = -1;
private boolean _hasAudio, _hasVideo;
private WeedInfo _weed;
private WRMXML _wrmdata;
String getAlbum() { return _album; }
String getArtist() { return _artist; }
String getTitle() { return _title; }
String getYear() { return _year; }
String getCopyright() { return _copyright; }
String getRating() { return _rating; }
String getGenre() { return _genre; }
String getComment() { return _comment; }
short getTrack() { return _track; }
int getBitrate() { return _bitrate; }
int getLength() { return _length; }
int getWidth() { return _width; }
int getHeight() { return _height; }
WeedInfo getWeedInfo() { return _weed; }
WRMXML getWRMXML() { return _wrmdata; }
boolean hasAudio() { return _hasAudio; }
boolean hasVideo() { return _hasVideo; }
String getLicenseInfo() {
if(_weed != null)
return _weed.getLicenseInfo();
else if(_wrmdata != null && _drmType != null)
return WRMXML.PROTECTED + _drmType;
else
return null;
}
/**
* Constructs a new ASFParser based off the given file, parsing all the known properties.
*/
ASFParser(File f) throws IOException {
parseFile(f);
}
/**
* Parses the given file for metadata we understand.
*/
protected void parseFile(File f) throws IOException {
if(LOG.isDebugEnabled())
LOG.debug("Parsing file: " + f);
InputStream is = null;
try {
is = new BufferedInputStream(new FileInputStream(f));
parse(is);
} catch(IOException iox) {
LOG.warn("IOX while parsing", iox);
throw iox;
} finally {
IOUtils.close(is);
}
}
/**
* Parses a ASF input stream's metadata.
* This first checks that the marker (16 bytes) is correct, reads the data offset & object count,
* and then iterates through the objects, reading them.
* Each object is stored in the format:
* ObjectID (16 bytes)
* Object Size (4 bytes)
* Object (Object Size bytes)
*/
private void parse(InputStream is) throws IOException {
CountingInputStream counter = new CountingInputStream(is);
DataInputStream ds = new DataInputStream(counter);
byte[] marker = new byte[IDs.HEADER_ID.length];
ds.readFully(marker);
if(!Arrays.equals(marker, IDs.HEADER_ID))
throw new IOException("not an ASF file");
long dataOffset = ByteOrder.leb2long(ds);
int objectCount = ByteOrder.leb2int(ds);
IOUtils.ensureSkip(ds, 2);
if(LOG.isDebugEnabled())
LOG.debug("Data Offset: " + dataOffset + ", objectCount: " + objectCount);
if (dataOffset < 0)
throw new IOException("ASF file is corrupt. Data offset negative:"
+dataOffset);
if (objectCount < 0)
throw new IOException("ASF file is corrupt. Object count unreasonable:"
+ ByteOrder.uint2long(objectCount));
if(objectCount > 100)
throw new IOException("object count very high: " + objectCount);
byte[] object = new byte[16];
for(int i = 0; i < objectCount; i++) {
if(LOG.isDebugEnabled())
LOG.debug("Parsing object[" + i + "]");
ds.readFully(object);
long size = ByteOrder.leb2long(ds) - 24;
if (size < 0)
throw new IOException("ASF file is corrupt. Object size < 0 :"+size);
counter.clearAmountRead();
readObject(ds, object, size);
int read = counter.getAmountRead();
if(read > size)
throw new IOException("read (" + read + ") more than size (" + size + ")");
else if(read != size) {
if(LOG.isDebugEnabled())
LOG.debug("Skipping to next object. Read: " + read + ", size: " + size);
IOUtils.ensureSkip(ds, size - read);
}
}
}
/**
* Reads a single object from a ASF metadata stream.
* The objectID has already been read. Each object is stored differently.
*/
private void readObject(DataInputStream ds, byte[] id, long size) throws IOException {
if(Arrays.equals(id, IDs.FILE_PROPERTIES_ID))
parseFileProperties(ds);
else if(Arrays.equals(id, IDs.STREAM_PROPERTIES_ID))
parseStreamProperties(ds);
else if(Arrays.equals(id, IDs.EXTENDED_STREAM_PROPERTIES_ID))
parseExtendedStreamProperties(ds);
else if(Arrays.equals(id, IDs.CONTENT_DESCRIPTION_ID))
parseContentDescription(ds);
else if(Arrays.equals(id, IDs.EXTENDED_CONTENT_DESCRIPTION_ID))
parseExtendedContentDescription(ds);
else if(Arrays.equals(id, IDs.CONTENT_ENCRYPTION_ID))
parseContentEncryption(ds);
else if(Arrays.equals(id, IDs.EXTENDED_CONTENT_ENCRYPTION_ID))
parseExtendedContentEncryption(ds);
else {
LOG.debug("Unknown Object, ignoring.");
// for debugging.
//byte[] temp = new byte[size];
//ds.readFully(temp);
//LOG.debug("id: " + string(id) + ", data: " + string(temp));
}
}
/** Parses known information out of the file properties object. */
private void parseFileProperties(DataInputStream ds) throws IOException {
LOG.debug("Parsing file properties");
IOUtils.ensureSkip(ds, 48);
int duration = (int)(ByteOrder.leb2long(ds) / 10000000);
if (duration < 0)
throw new IOException("ASF file corrupt. Duration < 0:"+duration);
_length = duration;
IOUtils.ensureSkip(ds, 20);
int maxBR = ByteOrder.leb2int(ds);
if (maxBR < 0)
throw new IOException("ASF file corrupt. Max bitrate > 2 Gb/s:"+
ByteOrder.uint2long(maxBR));
if(LOG.isDebugEnabled())
LOG.debug("maxBitrate: " + maxBR);
_bitrate = maxBR / 1000;
}
/** Parses stream properties to see if we have audio or video data. */
private void parseStreamProperties(DataInputStream ds) throws IOException {
LOG.debug("Parsing stream properties");
byte[] streamID = new byte[16];
ds.readFully(streamID);
if(Arrays.equals(streamID, IDs.AUDIO_STREAM_ID)) {
_hasAudio = true;
} else if(Arrays.equals(streamID, IDs.VIDEO_STREAM_ID)) {
_hasVideo = true;
IOUtils.ensureSkip(ds, 38);
_width = ByteOrder.leb2int(ds);
if (_width < 0)
throw new IOException("ASF file corrupt. Video width excessive:"+
ByteOrder.uint2long(_width));
_height = ByteOrder.leb2int(ds);
if (_height < 0)
throw new IOException("ASF file corrupt. Video height excessive:"+
ByteOrder.uint2long(_height));
}
// we aren't reading everything, but we'll skip over just fine.
}
/** Parses known information out of the extended stream properties object. */
private void parseExtendedStreamProperties(DataInputStream ds) throws IOException {
LOG.debug("Parsing extended stream properties");
IOUtils.ensureSkip(ds, 56);
int channels = ByteOrder.ushort2int(ByteOrder.leb2short(ds));
int sampleRate = ByteOrder.leb2int(ds);
if (sampleRate < 0)
throw new IOException("ASF file corrupt. Sample rate excessive:"+
ByteOrder.uint2long(sampleRate));
int byteRate = ByteOrder.leb2int(ds);
if (byteRate < 0)
throw new IOException("ASF file corrupt. Byte rate excessive:"+
ByteOrder.uint2long(byteRate));
if(_bitrate == -1)
_bitrate = byteRate * 8 / 1000;
if(LOG.isDebugEnabled())
LOG.debug("channels: " + channels + ", sampleRate: " + sampleRate + ", byteRate: " + byteRate + ", bitRate: " + _bitrate);
}
/**
* Parses the content encryption object, to determine if the file is protected.
* We parse through it all, even though we don't use all of it, to ensure
* that the object is well-formed.
*/
private void parseContentEncryption(DataInputStream ds) throws IOException {
LOG.debug("Parsing content encryption");
long skipSize = ByteOrder.uint2long(ByteOrder.leb2int(ds)); // data
IOUtils.ensureSkip(ds, skipSize);
int typeSize = ByteOrder.leb2int(ds); // type
if (typeSize < 0)
throw new IOException("ASF file is corrupt. Type size < 0: "+typeSize);
byte[] b = new byte[typeSize];
ds.readFully(b);
_drmType = new String(b).trim();
skipSize = ByteOrder.uint2long(ByteOrder.leb2int(ds)); // data
IOUtils.ensureSkip(ds, skipSize);
skipSize = ByteOrder.uint2long(ByteOrder.leb2int(ds)); // url
IOUtils.ensureSkip(ds, skipSize);
}
/**
* Parses the extended content encryption object, looking for encryption's
* we know about.
* Currently, this is Weed.
*/
private void parseExtendedContentEncryption(DataInputStream ds) throws IOException {
LOG.debug("Parsing extended content encryption");
int size = ByteOrder.leb2int(ds);
if (size < 0)
throw new IOException("ASF file reports excessive length of encryption data:"
+ByteOrder.uint2long(size));
byte[] b = new byte[size];
ds.readFully(b);
String xml = new String(b, "UTF-16").trim();
WRMXML wrmdata = new WRMXML(xml);
if(!wrmdata.isValid()) {
LOG.debug("WRM Data is invalid.");
return;
}
_wrmdata = wrmdata;
WeedInfo weed = new WeedInfo(wrmdata);
if(weed.isValid()) {
LOG.debug("Parsed weed data.");
_weed = weed;
_wrmdata = weed;
if(_weed.getAuthor() != null)
_artist = _weed.getAuthor();
if(_weed.getTitle() != null)
_title = _weed.getTitle();
if(_weed.getDescription() != null)
_comment = _weed.getDescription();
if(_weed.getCollection() != null)
_album = _weed.getCollection();
if(_weed.getCopyright() != null)
_copyright = _weed.getCopyright();
return;
}
}
/**
* Parses known information out of the Content Description object.
* The data is stored as:
* 10 bytes of sizes (2 bytes for each size).
* The data corresponding to each size. The data is stored in order of:
* Title, Author, Copyright, Description, Rating.
*/
private void parseContentDescription(DataInputStream ds) throws IOException {
LOG.debug("Parsing Content Description");
int[] sizes = { -1, -1, -1, -1, -1 };
for(int i = 0; i < sizes.length; i++)
sizes[i] = ByteOrder.ushort2int(ByteOrder.leb2short(ds));
byte[][] info = new byte[5][];
for(int i = 0; i < sizes.length; i++)
info[i] = new byte[sizes[i]];
for(int i = 0; i < info.length; i++)
ds.readFully(info[i]);
_title = string(info[0]);
_artist = string(info[1]);
_copyright = string(info[2]);
_comment = string(info[3]);
_rating = string(info[4]);
if(LOG.isDebugEnabled())
LOG.debug("Standard Tag Values. Title: " + _title + ", Author: " + _artist + ", Copyright: " + _copyright
+ ", Description: " + _comment + ", Rating: " + _rating);
}
/**
* Reads the extended Content Description object.
* The extended tag has an arbitrary number of fields.
* The number of fields is stored first, as:
* Field Count (2 bytes)
*
* Each field is stored as:
* Field Size (2 bytes)
* Field (Field Size bytes)
* Data Type (2 bytes)
* Data Size (2 bytes)
* Data (Data Size bytes)
*/
private void parseExtendedContentDescription(DataInputStream ds) throws IOException {
LOG.debug("Parsing extended content description");
int fieldCount = ByteOrder.ushort2int(ByteOrder.leb2short(ds));
if(LOG.isDebugEnabled())
LOG.debug("Extended fieldCount: " + fieldCount);
for(int i = 0; i < fieldCount; i++) {
int fieldSize = ByteOrder.ushort2int(ByteOrder.leb2short(ds));
byte[] field = new byte[fieldSize];
ds.readFully(field);
String fieldName = string(field);
int dataType = ByteOrder.ushort2int(ByteOrder.leb2short(ds));
int dataSize = ByteOrder.ushort2int(ByteOrder.leb2short(ds));
switch(dataType) {
case TYPE_STRING:
parseExtendedString(fieldName, dataSize, ds);
break;
case TYPE_BINARY:
parseExtendedBinary(fieldName, dataSize, ds);
break;
case TYPE_BOOLEAN:
parseExtendedBoolean(fieldName, dataSize, ds);
break;
case TYPE_INT:
parseExtendedInt(fieldName, dataSize, ds);
break;
case TYPE_LONG:
parseExtendedInt(fieldName, dataSize, ds);
break;
default:
if(LOG.isDebugEnabled())
LOG.debug("Unknown dataType: " + dataType + " for field: " + fieldName);
IOUtils.ensureSkip(ds, dataSize);
}
}
}
/**
* Parses a value from an extended tag, assuming the value is of the 'string' dataType.
*/
private void parseExtendedString(String field, int size, DataInputStream ds) throws IOException {
byte[] data = new byte[Math.min(250, size)];
ds.readFully(data);
int leftover = Math.max(0, size - 250);
IOUtils.ensureSkip(ds, leftover);
String info = string(data);
if(LOG.isDebugEnabled())
LOG.debug("Parsing extended String. field: " + field + ", Value: " + info);
if(Extended.WM_TITLE.equals(field)) {
if(_title == null)
_title = info;
} else if(Extended.WM_AUTHOR.equals(field)) {
if(_artist == null)
_artist = info;
} else if(Extended.WM_ALBUMTITLE.equals(field)) {
if(_album == null)
_album = info;
} else if(Extended.WM_TRACK_NUMBER.equals(field)) {
if(_track == -1)
_track = toShort(info);
} else if(Extended.WM_YEAR.equals(field)) {
if(_year == null)
_year = info;
} else if(Extended.WM_GENRE.equals(field)) {
if(_genre == null)
_genre = info;
} else if(Extended.WM_DESCRIPTION.equals(field)) {
if(_comment == null)
_comment = info;
}
}
/**
* Parses a value from an extended tag, assuming the value is of the 'boolean' dataType.
*/
private void parseExtendedBoolean(String field, int size, DataInputStream ds) throws IOException {
if(LOG.isDebugEnabled())
LOG.debug("Ignoring boolean field: " + field + ", size: " + size);
IOUtils.ensureSkip(ds, size);
}
/**
* Parses a value from an extended tag, assuming the value is of the 'int' dataType.
*/
private void parseExtendedInt(String field, int size, DataInputStream ds) throws IOException {
if(size != 4) {
if(LOG.isDebugEnabled())
LOG.debug("Int field size != 4, ignoring. Field: " + field + ", size: " + size);
IOUtils.ensureSkip(ds, size);
return;
}
int value = ByteOrder.leb2int(ds);
if(LOG.isDebugEnabled())
LOG.debug("Parsing extended int, field: " + field + ", size: " + size + ", value: " + value);
if(Extended.WM_TRACK_NUMBER.equals(field)) {
if(_track == -1) {
short shortValue = (short)value;
if (shortValue < 0)
throw new IOException("ASF file reports negative track number "+shortValue);
_track = shortValue;
}
}
}
/**
* Parses a value from an extended tag, assuming the value is of the 'binary' dataType.
*/
private void parseExtendedBinary(String field, int size, DataInputStream ds) throws IOException {
if(LOG.isDebugEnabled())
LOG.debug("Ignoring binary field: " + field + ", size: " + size);
IOUtils.ensureSkip(ds, size);
}
/**
* Parses a value from an extended tag, assuming the value is of the 'long' dataType.
*/
private void parseExtendedLong(String field, int size, DataInputStream ds) throws IOException {
if(size != 8) {
if(LOG.isDebugEnabled())
LOG.debug("Long field size != 8, ignoring. Field: " + field + ", size: " + size);
IOUtils.ensureSkip(ds, size);
return;
}
long value = ByteOrder.leb2long(ds);
if(LOG.isDebugEnabled())
LOG.debug("Ignoring long field: " + field + ", size: " + size + ", value: " + value);
}
/** Converts a String to a short, if it can. */
private short toShort(String x) {
try {
return Short.parseShort(x);
} catch(NumberFormatException nfe) {
return -1;
}
}
/**
* Returns a String uses ASF's encoding (WCHAR: UTF-16 little endian).
* If we don't support that encoding for whatever, hack out the zeros.
*/
private String string(byte[] x) throws IOException {
if(x == null)
return null;
try {
return new String(x, "UTF-16LE").trim();
} catch(UnsupportedEncodingException uee) {
// hack.
int pos = 0;
for(int i = 0; i < x.length; i++) {
if(x[i] != 0)
x[pos++] = x[i];
}
return new String(x, 0, pos, "UTF-8");
}
}
private static class IDs {
private static final byte HEADER_ID[] =
{ (byte)0x30, (byte)0x26, (byte)0xB2, (byte)0x75, (byte)0x8E, (byte)0x66, (byte)0xCF, (byte)0x11,
(byte)0xA6, (byte)0xD9, (byte)0x00, (byte)0xAA, (byte)0x00, (byte)0x62, (byte)0xCE, (byte)0x6C };
private static final byte FILE_PROPERTIES_ID[] =
{ (byte)0xA1, (byte)0xDC, (byte)0xAB, (byte)0x8C, (byte)0x47, (byte)0xA9, (byte)0xCF, (byte)0x11,
(byte)0x8E, (byte)0xE4, (byte)0x00, (byte)0xC0, (byte)0x0C, (byte)0x20, (byte)0x53, (byte)0x65 };
private static final byte STREAM_PROPERTIES_ID[] =
{ (byte)0x91, (byte)0x07, (byte)0xDC, (byte)0xB7, (byte)0xB7, (byte)0xA9, (byte)0xCF, (byte)0x11,
(byte)0x8E, (byte)0xE6, (byte)0x00, (byte)0xC0, (byte)0x0C, (byte)0x20, (byte)0x53, (byte)0x65 };
private static final byte EXTENDED_STREAM_PROPERTIES_ID[] =
{ (byte)0xCB, (byte)0xA5, (byte)0xE6, (byte)0x14, (byte)0x72, (byte)0xC6, (byte)0x32, (byte)0x43,
(byte)0x83, (byte)0x99, (byte)0xA9, (byte)0x69, (byte)0x52, (byte)0x06, (byte)0x5B, (byte)0x5A };
private static final byte CONTENT_DESCRIPTION_ID[] =
{ (byte)0x33, (byte)0x26, (byte)0xB2, (byte)0x75, (byte)0x8E, (byte)0x66, (byte)0xCF, (byte)0x11,
(byte)0xA6, (byte)0xD9, (byte)0x00, (byte)0xAA, (byte)0x00, (byte)0x62, (byte)0xCE, (byte)0x6C };
private static final byte EXTENDED_CONTENT_DESCRIPTION_ID[] =
{ (byte)0x40, (byte)0xA4, (byte)0xD0, (byte)0xD2, (byte)0x07, (byte)0xE3, (byte)0xD2, (byte)0x11,
(byte)0x97, (byte)0xF0, (byte)0x00, (byte)0xA0, (byte)0xC9, (byte)0x5E, (byte)0xA8, (byte)0x50 };
private static final byte CONTENT_ENCRYPTION_ID[] =
{ (byte)0xFB, (byte)0xB3, (byte)0x11, (byte)0x22, (byte)0x23, (byte)0xBD, (byte)0xD2, (byte)0x11,
(byte)0xB4, (byte)0xB7, (byte)0x00, (byte)0xA0, (byte)0xC9, (byte)0x55, (byte)0xFC, (byte)0x6E };
private static final byte EXTENDED_CONTENT_ENCRYPTION_ID[] =
{ (byte)0x14, (byte)0xE6, (byte)0x8A, (byte)0x29, (byte)0x22, (byte)0x26, (byte)0x17, (byte)0x4C,
(byte)0xB9, (byte)0x35, (byte)0xDA, (byte)0xE0, (byte)0x7E, (byte)0xE9, (byte)0x28, (byte)0x9C };
private static final byte CODEC_LIST_ID[] =
{ (byte)0x40, (byte)0x52, (byte)0xD1, (byte)0x86, (byte)0x1D, (byte)0x31, (byte)0xD0, (byte)0x11,
(byte)0xA3, (byte)0xA4, (byte)0x00, (byte)0xA0, (byte)0xC9, (byte)0x03, (byte)0x48, (byte)0xF6 };
private static final byte AUDIO_STREAM_ID[] =
{ (byte)0x40, (byte)0x9E, (byte)0x69, (byte)0xF8, (byte)0x4D, (byte)0x5B, (byte)0xCF, (byte)0x11,
(byte)0xA8, (byte)0xFD, (byte)0x00, (byte)0x80, (byte)0x5F, (byte)0x5C, (byte)0x44, (byte)0x2B };
private static final byte VIDEO_STREAM_ID[] =
{ (byte)0xC0, (byte)0xEF, (byte)0x19, (byte)0xBC, (byte)0x4D, (byte)0x5B, (byte)0xCF, (byte)0x11,
(byte)0xA8, (byte)0xFD, (byte)0x00, (byte)0x80, (byte)0x5F, (byte)0x5C, (byte)0x44, (byte)0x2B };
}
private static class Extended {
/** the title of the file */
private static final String WM_TITLE = "WM/Title";
/** the author of the fiel */
private static final String WM_AUTHOR = "WM/Author";
/** the title of the album the file is on */
private static final String WM_ALBUMTITLE = "WM/AlbumTitle";
/** the zero-based track of the song */
private static final String WM_TRACK = "WM/Track";
/** the one-based track of the song */
private static final String WM_TRACK_NUMBER = "WM/TrackNumber";
/** the year the song was made */
private static final String WM_YEAR = "WM/Year";
/** the genre of the song */
private static final String WM_GENRE = "WM/Genre";
/** the description of the song */
private static final String WM_DESCRIPTION = "WM/Description";
/** the lyrics of the song */
private static final String WM_LYRICS = "WM/Lyrics";
/** whether or not this is encoded in VBR */
private static final String VBR = "IsVBR";
/** the unique file identifier of this song */
private static final String WM_UNIQUE_FILE_IDENTIFIER = "WM/UniqueFileIdentifier";
/** the artist of the album as a whole */
private static final String WM_ALBUMARTIST = "WM/AlbumArtist";
/** the encapsulated ID3 info */
private static final String ID3 = "ID3";
/** the provider of the song */
private static final String WM_PROVIDER = "WM/Provider";
/** the rating the provider gave this song */
private static final String WM_PROVIDER_RATING = "WM/ProviderRating";
/** the publisher */
private static final String WM_PUBLISHER = "WM/Publisher";
/** the composer */
private static final String WM_COMPOSER = "WM/Composer";
/** the time the song was encoded */
private static final String WM_ENCODING_TIME = "WM/EncodingTime";
}
}