package com.limegroup.gnutella.metadata;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import com.limegroup.gnutella.ByteOrder;
import com.limegroup.gnutella.util.CountingInputStream;
import com.limegroup.gnutella.util.IOUtils;
/**
* Limited metadata parsing of m4a files. This is based on code published
* by Chris Adamson and released under GPL. Information about the format was
* originally found on <url>http://www.oreillynet.com/pub/wlg/3130</url>.
*
* A great THANK YOU to Roger Kapsi for his help!
*
*
* The m4a files have a tree structure composed of atoms.
* Atoms are similar to xml tags.
*
* Here is the structure of a typical m4a file:
* [ftyp]
* [moov]
* [mvhd]
* (length info - 5th 32bit int divided by 4th 32 bit int)
* [trak]
* [udta]
* [meta]
* [hdlr]
* [ilst]
* (metadata atoms)
* ....
* (other atoms we don't care about)
*
* each metadata atom contains its data in a [data] atoms. for example,
* the structure of the genre atom is:
* [gnre]
* [data]
* (the genre of the file)
*
*
* furthermore, each atom has a 8 or 16 byte header.
* 32 bit unsigned integer size (includes header). If its 1, there is extended size.
* 32 bit id
* (optional) 64 bit extended size
*
* Although sometimes the atom names can be represented as strings, sometimes they
* contain non-asccii characters, so it is safer to represent all atom names as integers.
* (that's what says in the spec too)
*
*/
public class M4AMetaData extends AudioMetaData {
/**
* some atoms we don't care about
*/
private static final int FTYP_ATOM = 0x66747970;
private static final int MOOV_ATOM = 0x6d6f6f76;
private static final int MVHD_ATOM = 0x6d766864;
private static final int TRAK_ATOM = 0x7472616b;
private static final int TKHD_ATOM = 0x746b6864;
private static final int MDIA_ATOM = 0x6d646961;
private static final int ESDS_ATOM = 0x65736473;
private static final int ALAC_ATOM = 0x616c6163;
private static final int MDHD_ATOM = 0x6d646864;
private static final int MINF_ATOM = 0x6d696e66;
private static final int DINF_ATOM = 0x64696e66;
private static final int SMHD_ATOM = 0x736d6864;
private static final int STBL_ATOM = 0x7374626c;
private static final int STSD_ATOM = 0x73747364;
private static final int MP4A_ATOM = 0x6d703461;
private static final int DRMS_ATOM = 0x64726d73;
private static final int UDTA_ATOM = 0x75647461;
private static final int META_ATOM = 0x6d657461;
private static final int HDLR_ATOM = 0x68646c72;
private static final int STTS_ATOM = 0x73747473;
private static final int STSC_ATOM = 0x73747363;
private static final int STSZ_ATOM = 0x7374737a;
private static final int STCO_ATOM = 0x7374636f;
/**
* this atom contains the metadata.
*/
private static final int ILST_ATOM= 0x696c7374;
/**
* some metadata header atoms
*/
private final static int NAME_ATOM = 0xa96e616d; //0xa9+ "nam"
private final static int ALBUM_ATOM = 0xa9616c62; //0xa9 + "alb"
private final static int ARTIST_ATOM = 0xa9415254; //0xa9 + "ART"
private final static int DATE_ATOM = 0xa9646179; //0xa9 +"day"
private final static int GENRE_ATOM = 0x676e7265; //"gnre"
private final static int GENRE_ATOM_STANDARD = 0xA967656E; //"0xa9+"gen"
private final static int TRACK_ATOM = 0x74726b6e; //"trkn"
private final static int TRACK_ATOM_STANDARD = 0xA974726b; //0xa9+"trk"
private final static int COMMENT_ATOM = 0xA9636D74; //'�cmt'
private final static int DISK_ATOM = 0x6469736b; //"disk"
/**
* the data atom within each metadata atom
*/
private final static int DATA_ATOM = 0x64617461; //"data"
private int _maxLength;
public M4AMetaData(File f) throws IOException{
super(f);
}
/* (non-Javadoc)
* @see com.limegroup.gnutella.mp3.MetaData#parseFile(java.io.File)
*/
protected void parseFile(File f) throws IOException {
FileInputStream fin = null;
try{
_maxLength=(int)f.length();
fin = new FileInputStream(f);
positionMetaDataStream(fin);
Map metaData = populateMetaDataMap(fin);
//the title, artist album and comment tags are in string format.
//so we just set them
byte []current = (byte []) metaData.get(new Integer(NAME_ATOM));
setTitle(current == null ? "" : new String(current, "UTF-8"));
current = (byte []) metaData.get(new Integer(ARTIST_ATOM));
setArtist(current == null ? "" : new String(current, "UTF-8"));
current = (byte []) metaData.get(new Integer(ALBUM_ATOM));
setAlbum(current == null ? "" : new String(current,"UTF-8"));
current = (byte []) metaData.get(new Integer(COMMENT_ATOM));
setComment(current == null ? "" : new String(current,"UTF-8"));
// the genre is byte encoded the same way as with id3 tags
// except that the table is shifted one position
current = (byte []) metaData.get(new Integer(GENRE_ATOM));
if (current!=null) {
if (current[3] == 1) {
//we have a custom genre.
String genre = new String(current,8,current.length-8,"UTF-8");
setGenre(genre);
} else {
short genreShort = (short) (ByteOrder.beb2short(current, current.length-2) -1);
setGenre(MP3MetaData.getGenreString(genreShort));
}
}
//the date is plaintext. Store only the year
current = (byte []) metaData.get(new Integer(DATE_ATOM));
if (current==null)
setYear("");
else {
String year = new String(current,8,current.length-8);
if (year.length()>4)
year = year.substring(0,4);
setYear(year);
}
//get the track # & total # of tracks on album
current = (byte []) metaData.get(new Integer(TRACK_ATOM));
if (current != null) {
short trackShort = ByteOrder.beb2short(current,current.length-6);
setTrack(trackShort);
short trackTotal = ByteOrder.beb2short(current,current.length-4);
setTotalTracks(trackTotal);
}
//get the disk # & total # of disks on album
current = (byte []) metaData.get(new Integer(DISK_ATOM));
if (current != null) {
short diskShort = ByteOrder.beb2short(current,current.length-4);
setDisk(diskShort);
short diskTotal = ByteOrder.beb2short(current,current.length-2);
setTotalDisks(diskTotal);
}
//TODO: add more fields as we discover their meaning.
}finally {
if (fin!=null)
try{fin.close();}catch(IOException ignored){}
}
}
/**
* positions the stream past the current atom.
* the current stream position must be at the beginning of the atom
*
* @param atomType the expected atom type, used for verification
* @param the <tt>DataInputStream</tt> to modify
* @throws IOException either reading failed, or the atom type didn't match
*/
private void skipAtom(int atomType, DataInputStream in) throws IOException {
IOUtils.ensureSkip(in,enterAtom(atomType,in));
}
/**
* reads the atom headers and positions the stream at the beginning
* of the data of the atom.
* it assumes the current position is at the beginning of the atom
*
* @param atomType the expected atom type, used for verification
* @param the <tt> DataInputStream </tt> to modify
* @throws IOException either reading failed, or the atom type didn't match
* @return the remaining size of the atom.
*/
private int enterAtom(int atomType, DataInputStream in) throws IOException {
boolean extended = false;
int size = in.readInt();
if (size >= _maxLength)
throw new IOException ("invalid size field read");
int type = in.readInt();
if (type!=atomType)
throw new IOException ("atom type mismatch, expected " +atomType+ " got "+ type);
if (size == 1) {
extended = true;
size = (int)in.readLong();
}
size-= extended ? 16 : 8;
return size;
}
/**
* skips through the headers of the file that we do not care about,
* loads the metadata atom into memory and returns a stream for it
*
* @return a <tt>DataInputStream</tt> whose source is a copy of the
* atom containing the metadata atoms
*/
private void positionMetaDataStream(InputStream rawIn) throws IOException{
DataInputStream in = new DataInputStream(rawIn);
byte []ILST = null;
skipAtom(FTYP_ATOM,in);
enterAtom(MOOV_ATOM,in);
//extract the length.
int mvhdSize = enterAtom(MVHD_ATOM,in)-20;
IOUtils.ensureSkip(in,12);
int timeScale = in.readInt();
int timeUnits = in.readInt();
setLength((int) ( timeUnits/timeScale));
IOUtils.ensureSkip(in,mvhdSize);
//extract the bitrate.
enterAtom(TRAK_ATOM, in);
skipAtom(TKHD_ATOM, in);
enterAtom(MDIA_ATOM, in);
skipAtom(MDHD_ATOM, in);
skipAtom(HDLR_ATOM, in);
enterAtom(MINF_ATOM, in);
skipAtom(SMHD_ATOM, in);
skipAtom(DINF_ATOM, in);
enterAtom(STBL_ATOM, in);
enterAtom(STSD_ATOM, in);
processSTSDAtom(in);
skipAtom(STTS_ATOM, in);
skipAtom(STSC_ATOM, in);
skipAtom(STSZ_ATOM, in);
skipAtom(STCO_ATOM, in);
enterAtom(UDTA_ATOM,in);
enterAtom(META_ATOM,in);
IOUtils.ensureSkip(in,4); //no comment...
skipAtom(HDLR_ATOM,in);
_maxLength = enterAtom(ILST_ATOM,in);
}
/**
* [stsd]
* (1. some data whereof we are not interested in)
* [mp4a] or [alac] (or [drms], is not supported here)
* (2. data whereof we are not interested in)
* [esds] or [alac]
* (bitrate is at offset 0x1A or 0x14)
*
*/
private void processSTSDAtom(DataInputStream in) throws IOException {
IOUtils.ensureSkip(in,8+4); // (1) skip some data of [stsd]
int atomType = in.readInt(); // [mp4a], [alac]
IOUtils.ensureSkip(in,0x1c); // (2) skip more data of [mp4a]...
if (atomType == MP4A_ATOM) {
// || atomType == DRMS_ATOM
enterBitrateAtom(ESDS_ATOM, 0x1A, in);
} else if (atomType == ALAC_ATOM) {
enterBitrateAtom(ALAC_ATOM, 0x14, in);
} else {
throw new IOException ("atom type mismatch, expected " +MP4A_ATOM+ " or " +ALAC_ATOM+ " got " +atomType);
}
}
/**
* Retrieve the Bitrate
*/
private void enterBitrateAtom(int atom, int skip, DataInputStream in) throws IOException {
int length = enterAtom(atom, in);
length -= IOUtils.ensureSkip(in,skip);
int avgBitrate = in.readInt();
length -= 4;
setBitrate((int)(avgBitrate/1000)); // bits to kbits
IOUtils.ensureSkip(in,length); // ignore the rest of this atom
}
/**
* populates the metaData map with values read from the file
* @throws IOException parsing failed
*/
private Map populateMetaDataMap(InputStream rawIn) throws IOException {
Map metaData = new HashMap();
CountingInputStream cin = new CountingInputStream(rawIn);
DataInputStream in = new DataInputStream(cin);
while (cin.getAmountRead() < _maxLength && !isComplete()) {
int currentSize = in.readInt();
if (currentSize > _maxLength)
throw new IOException("invalid file size");
int currentType = in.readInt();
switch(currentType) {
case NAME_ATOM :
metaData.put(new Integer(NAME_ATOM), readDataAtom(in));break;
case ARTIST_ATOM :
metaData.put(new Integer(ARTIST_ATOM), readDataAtom(in));break;
case ALBUM_ATOM :
metaData.put(new Integer(ALBUM_ATOM), readDataAtom(in));break;
case TRACK_ATOM :
case TRACK_ATOM_STANDARD:
metaData.put(new Integer(TRACK_ATOM), readDataAtom(in));break;
case GENRE_ATOM :
case GENRE_ATOM_STANDARD:
metaData.put(new Integer(GENRE_ATOM), readDataAtom(in));break;
case DATE_ATOM:
metaData.put(new Integer(DATE_ATOM), readDataAtom(in));break;
case COMMENT_ATOM:
metaData.put(new Integer(COMMENT_ATOM), readDataAtom(in));break;
case DISK_ATOM:
metaData.put(new Integer(DISK_ATOM), readDataAtom(in));break;
//add more atoms as we learn their meaning
default:
//skip unknown atoms.
IOUtils.ensureSkip(in,currentSize-8);
}
}
return metaData;
}
/**
* reads the data atom contained in a metadata atom.
* @return the content of the data atom
* @throws IOException the data atom was not found or error occured
*/
private byte[] readDataAtom(DataInputStream in) throws IOException{
int size = in.readInt();
if (in.readInt() != DATA_ATOM)
throw new IOException("data tag not found");
byte [] res = new byte[size-8];
//_in.skip(8);
in.readFully(res);
return res;
}
}