package com.limegroup.gnutella.metadata; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import com.limegroup.gnutella.ByteOrder; import com.limegroup.gnutella.util.CountingInputStream; import com.limegroup.gnutella.util.IOUtils; /** * Limited metadata parsing of m4a files. This is based on code published * by Chris Adamson and released under GPL. Information about the format was * originally found on <url>http://www.oreillynet.com/pub/wlg/3130</url>. * * A great THANK YOU to Roger Kapsi for his help! * * * The m4a files have a tree structure composed of atoms. * Atoms are similar to xml tags. * * Here is the structure of a typical m4a file: * [ftyp] * [moov] * [mvhd] * (length info - 5th 32bit int divided by 4th 32 bit int) * [trak] * [udta] * [meta] * [hdlr] * [ilst] * (metadata atoms) * .... * (other atoms we don't care about) * * each metadata atom contains its data in a [data] atoms. for example, * the structure of the genre atom is: * [gnre] * [data] * (the genre of the file) * * * furthermore, each atom has a 8 or 16 byte header. * 32 bit unsigned integer size (includes header). If its 1, there is extended size. * 32 bit id * (optional) 64 bit extended size * * Although sometimes the atom names can be represented as strings, sometimes they * contain non-asccii characters, so it is safer to represent all atom names as integers. * (that's what says in the spec too) * */ public class M4AMetaData extends AudioMetaData { /** * some atoms we don't care about */ private static final int FTYP_ATOM = 0x66747970; private static final int MOOV_ATOM = 0x6d6f6f76; private static final int MVHD_ATOM = 0x6d766864; private static final int TRAK_ATOM = 0x7472616b; private static final int TKHD_ATOM = 0x746b6864; private static final int MDIA_ATOM = 0x6d646961; private static final int ESDS_ATOM = 0x65736473; private static final int ALAC_ATOM = 0x616c6163; private static final int MDHD_ATOM = 0x6d646864; private static final int MINF_ATOM = 0x6d696e66; private static final int DINF_ATOM = 0x64696e66; private static final int SMHD_ATOM = 0x736d6864; private static final int STBL_ATOM = 0x7374626c; private static final int STSD_ATOM = 0x73747364; private static final int MP4A_ATOM = 0x6d703461; private static final int DRMS_ATOM = 0x64726d73; private static final int UDTA_ATOM = 0x75647461; private static final int META_ATOM = 0x6d657461; private static final int HDLR_ATOM = 0x68646c72; private static final int STTS_ATOM = 0x73747473; private static final int STSC_ATOM = 0x73747363; private static final int STSZ_ATOM = 0x7374737a; private static final int STCO_ATOM = 0x7374636f; /** * this atom contains the metadata. */ private static final int ILST_ATOM= 0x696c7374; /** * some metadata header atoms */ private final static int NAME_ATOM = 0xa96e616d; //0xa9+ "nam" private final static int ALBUM_ATOM = 0xa9616c62; //0xa9 + "alb" private final static int ARTIST_ATOM = 0xa9415254; //0xa9 + "ART" private final static int DATE_ATOM = 0xa9646179; //0xa9 +"day" private final static int GENRE_ATOM = 0x676e7265; //"gnre" private final static int GENRE_ATOM_STANDARD = 0xA967656E; //"0xa9+"gen" private final static int TRACK_ATOM = 0x74726b6e; //"trkn" private final static int TRACK_ATOM_STANDARD = 0xA974726b; //0xa9+"trk" private final static int COMMENT_ATOM = 0xA9636D74; //'�cmt' private final static int DISK_ATOM = 0x6469736b; //"disk" /** * the data atom within each metadata atom */ private final static int DATA_ATOM = 0x64617461; //"data" private int _maxLength; public M4AMetaData(File f) throws IOException{ super(f); } /* (non-Javadoc) * @see com.limegroup.gnutella.mp3.MetaData#parseFile(java.io.File) */ protected void parseFile(File f) throws IOException { FileInputStream fin = null; try{ _maxLength=(int)f.length(); fin = new FileInputStream(f); positionMetaDataStream(fin); Map metaData = populateMetaDataMap(fin); //the title, artist album and comment tags are in string format. //so we just set them byte []current = (byte []) metaData.get(new Integer(NAME_ATOM)); setTitle(current == null ? "" : new String(current, "UTF-8")); current = (byte []) metaData.get(new Integer(ARTIST_ATOM)); setArtist(current == null ? "" : new String(current, "UTF-8")); current = (byte []) metaData.get(new Integer(ALBUM_ATOM)); setAlbum(current == null ? "" : new String(current,"UTF-8")); current = (byte []) metaData.get(new Integer(COMMENT_ATOM)); setComment(current == null ? "" : new String(current,"UTF-8")); // the genre is byte encoded the same way as with id3 tags // except that the table is shifted one position current = (byte []) metaData.get(new Integer(GENRE_ATOM)); if (current!=null) { if (current[3] == 1) { //we have a custom genre. String genre = new String(current,8,current.length-8,"UTF-8"); setGenre(genre); } else { short genreShort = (short) (ByteOrder.beb2short(current, current.length-2) -1); setGenre(MP3MetaData.getGenreString(genreShort)); } } //the date is plaintext. Store only the year current = (byte []) metaData.get(new Integer(DATE_ATOM)); if (current==null) setYear(""); else { String year = new String(current,8,current.length-8); if (year.length()>4) year = year.substring(0,4); setYear(year); } //get the track # & total # of tracks on album current = (byte []) metaData.get(new Integer(TRACK_ATOM)); if (current != null) { short trackShort = ByteOrder.beb2short(current,current.length-6); setTrack(trackShort); short trackTotal = ByteOrder.beb2short(current,current.length-4); setTotalTracks(trackTotal); } //get the disk # & total # of disks on album current = (byte []) metaData.get(new Integer(DISK_ATOM)); if (current != null) { short diskShort = ByteOrder.beb2short(current,current.length-4); setDisk(diskShort); short diskTotal = ByteOrder.beb2short(current,current.length-2); setTotalDisks(diskTotal); } //TODO: add more fields as we discover their meaning. }finally { if (fin!=null) try{fin.close();}catch(IOException ignored){} } } /** * positions the stream past the current atom. * the current stream position must be at the beginning of the atom * * @param atomType the expected atom type, used for verification * @param the <tt>DataInputStream</tt> to modify * @throws IOException either reading failed, or the atom type didn't match */ private void skipAtom(int atomType, DataInputStream in) throws IOException { IOUtils.ensureSkip(in,enterAtom(atomType,in)); } /** * reads the atom headers and positions the stream at the beginning * of the data of the atom. * it assumes the current position is at the beginning of the atom * * @param atomType the expected atom type, used for verification * @param the <tt> DataInputStream </tt> to modify * @throws IOException either reading failed, or the atom type didn't match * @return the remaining size of the atom. */ private int enterAtom(int atomType, DataInputStream in) throws IOException { boolean extended = false; int size = in.readInt(); if (size >= _maxLength) throw new IOException ("invalid size field read"); int type = in.readInt(); if (type!=atomType) throw new IOException ("atom type mismatch, expected " +atomType+ " got "+ type); if (size == 1) { extended = true; size = (int)in.readLong(); } size-= extended ? 16 : 8; return size; } /** * skips through the headers of the file that we do not care about, * loads the metadata atom into memory and returns a stream for it * * @return a <tt>DataInputStream</tt> whose source is a copy of the * atom containing the metadata atoms */ private void positionMetaDataStream(InputStream rawIn) throws IOException{ DataInputStream in = new DataInputStream(rawIn); byte []ILST = null; skipAtom(FTYP_ATOM,in); enterAtom(MOOV_ATOM,in); //extract the length. int mvhdSize = enterAtom(MVHD_ATOM,in)-20; IOUtils.ensureSkip(in,12); int timeScale = in.readInt(); int timeUnits = in.readInt(); setLength((int) ( timeUnits/timeScale)); IOUtils.ensureSkip(in,mvhdSize); //extract the bitrate. enterAtom(TRAK_ATOM, in); skipAtom(TKHD_ATOM, in); enterAtom(MDIA_ATOM, in); skipAtom(MDHD_ATOM, in); skipAtom(HDLR_ATOM, in); enterAtom(MINF_ATOM, in); skipAtom(SMHD_ATOM, in); skipAtom(DINF_ATOM, in); enterAtom(STBL_ATOM, in); enterAtom(STSD_ATOM, in); processSTSDAtom(in); skipAtom(STTS_ATOM, in); skipAtom(STSC_ATOM, in); skipAtom(STSZ_ATOM, in); skipAtom(STCO_ATOM, in); enterAtom(UDTA_ATOM,in); enterAtom(META_ATOM,in); IOUtils.ensureSkip(in,4); //no comment... skipAtom(HDLR_ATOM,in); _maxLength = enterAtom(ILST_ATOM,in); } /** * [stsd] * (1. some data whereof we are not interested in) * [mp4a] or [alac] (or [drms], is not supported here) * (2. data whereof we are not interested in) * [esds] or [alac] * (bitrate is at offset 0x1A or 0x14) * */ private void processSTSDAtom(DataInputStream in) throws IOException { IOUtils.ensureSkip(in,8+4); // (1) skip some data of [stsd] int atomType = in.readInt(); // [mp4a], [alac] IOUtils.ensureSkip(in,0x1c); // (2) skip more data of [mp4a]... if (atomType == MP4A_ATOM) { // || atomType == DRMS_ATOM enterBitrateAtom(ESDS_ATOM, 0x1A, in); } else if (atomType == ALAC_ATOM) { enterBitrateAtom(ALAC_ATOM, 0x14, in); } else { throw new IOException ("atom type mismatch, expected " +MP4A_ATOM+ " or " +ALAC_ATOM+ " got " +atomType); } } /** * Retrieve the Bitrate */ private void enterBitrateAtom(int atom, int skip, DataInputStream in) throws IOException { int length = enterAtom(atom, in); length -= IOUtils.ensureSkip(in,skip); int avgBitrate = in.readInt(); length -= 4; setBitrate((int)(avgBitrate/1000)); // bits to kbits IOUtils.ensureSkip(in,length); // ignore the rest of this atom } /** * populates the metaData map with values read from the file * @throws IOException parsing failed */ private Map populateMetaDataMap(InputStream rawIn) throws IOException { Map metaData = new HashMap(); CountingInputStream cin = new CountingInputStream(rawIn); DataInputStream in = new DataInputStream(cin); while (cin.getAmountRead() < _maxLength && !isComplete()) { int currentSize = in.readInt(); if (currentSize > _maxLength) throw new IOException("invalid file size"); int currentType = in.readInt(); switch(currentType) { case NAME_ATOM : metaData.put(new Integer(NAME_ATOM), readDataAtom(in));break; case ARTIST_ATOM : metaData.put(new Integer(ARTIST_ATOM), readDataAtom(in));break; case ALBUM_ATOM : metaData.put(new Integer(ALBUM_ATOM), readDataAtom(in));break; case TRACK_ATOM : case TRACK_ATOM_STANDARD: metaData.put(new Integer(TRACK_ATOM), readDataAtom(in));break; case GENRE_ATOM : case GENRE_ATOM_STANDARD: metaData.put(new Integer(GENRE_ATOM), readDataAtom(in));break; case DATE_ATOM: metaData.put(new Integer(DATE_ATOM), readDataAtom(in));break; case COMMENT_ATOM: metaData.put(new Integer(COMMENT_ATOM), readDataAtom(in));break; case DISK_ATOM: metaData.put(new Integer(DISK_ATOM), readDataAtom(in));break; //add more atoms as we learn their meaning default: //skip unknown atoms. IOUtils.ensureSkip(in,currentSize-8); } } return metaData; } /** * reads the data atom contained in a metadata atom. * @return the content of the data atom * @throws IOException the data atom was not found or error occured */ private byte[] readDataAtom(DataInputStream in) throws IOException{ int size = in.readInt(); if (in.readInt() != DATA_ATOM) throw new IOException("data tag not found"); byte [] res = new byte[size-8]; //_in.skip(8); in.readFully(res); return res; } }