package org.red5.io.mp4;
/*
* RED5 Open Source Flash Server - http://code.google.com/p/red5/
*
* Copyright (c) 2006-2007 by respective authors (see below). All rights reserved.
*
* This library is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free Software
* Foundation; either version 2.1 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with this library; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
//import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.mina.core.buffer.IoBuffer;
import org.red5.io.IStreamableFile;
import org.red5.io.ITag;
import org.red5.io.ITagReader;
import org.red5.io.IoConstants;
import org.red5.io.amf.Output;
import org.red5.io.flv.IKeyFrameDataAnalyzer;
import org.red5.io.flv.Tag;
import org.red5.io.mp4.MP4Atom.CompositionTimeSampleRecord;
import org.red5.io.object.Serializer;
//import org.red5.io.utils.HexDump;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This reader is used to read the contents of an MP4 file.
*
* NOTE: This class is not implemented as thread-safe, the caller
* should ensure the thread-safety.
* <p>
* New NetStream notifications
* <br />
* Two new notifications facilitate the implementation of the playback components:
* <ul>
* <li>NetStream.Play.FileStructureInvalid: This event is sent if the player detects
* an MP4 with an invalid file structure. Flash Player cannot play files that have
* invalid file structures.</li>
* <li>NetStream.Play.NoSupportedTrackFound: This event is sent if the player does not
* detect any supported tracks. If there aren't any supported video, audio or data
* tracks found, Flash Player does not play the file.</li>
* </ul>
* </p>
*
* @author The Red5 Project (red5@osflash.org)
* @author Paul Gregoire (mondain@gmail.com)
*/
public class MP4Reader implements IoConstants, ITagReader, IKeyFrameDataAnalyzer {
/**
* Logger
*/
private static Logger log = LoggerFactory.getLogger(MP4Reader.class);
/** Audio packet prefix */
public final static byte[] PREFIX_AUDIO_FRAME = new byte[] { (byte) 0xaf, (byte) 0x01 };
/** Audio config aac main */
public final static byte[] AUDIO_CONFIG_FRAME_AAC_MAIN = new byte[] { (byte) 0x0a, (byte) 0x10 };
/** Audio config aac lc */
public final static byte[] AUDIO_CONFIG_FRAME_AAC_LC = new byte[] { (byte) 0x12, (byte) 0x10 };
/** Audio config sbr */
public final static byte[] AUDIO_CONFIG_FRAME_SBR = new byte[] { (byte) 0x13, (byte) 0x90, (byte) 0x56, (byte) 0xe5, (byte) 0xa5, (byte) 0x48, (byte) 0x00 };
/** Video packet prefix for the decoder frame */
public final static byte[] PREFIX_VIDEO_CONFIG_FRAME = new byte[] { (byte) 0x17, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00 };
/** Video packet prefix for key frames */
public final static byte[] PREFIX_VIDEO_KEYFRAME = new byte[] { (byte) 0x17, (byte) 0x01 };
/** Video packet prefix for standard frames (interframe) */
public final static byte[] PREFIX_VIDEO_FRAME = new byte[] { (byte) 0x27, (byte) 0x01 };
/**
* File
*/
private File file;
/**
* Input stream
*/
private MP4DataStream fis;
/**
* File channel
*/
private FileChannel channel;
/** Mapping between file position and timestamp in ms. */
private HashMap<Integer, Long> timePosMap;
private HashMap<Integer, Long> samplePosMap;
/** Whether or not the clip contains a video track */
private boolean hasVideo = false;
/** Whether or not the clip contains an audio track */
private boolean hasAudio = false;
//default video codec
private String videoCodecId = "avc1";
//default audio codec
private String audioCodecId = "mp4a";
//decoder bytes / configs
private byte[] audioDecoderBytes;
private byte[] videoDecoderBytes;
// duration in milliseconds
private long duration;
// movie time scale
private int timeScale;
private int width;
private int height;
//audio sample rate kHz
private double audioTimeScale;
private int audioChannels;
//default to aac lc
private int audioCodecType = 1;
private int videoSampleCount;
private double fps;
private double videoTimeScale;
private int avcLevel;
private int avcProfile;
private String formattedDuration;
private long moovOffset;
private long mdatOffset;
//samples to chunk mappings
private Vector<MP4Atom.Record> videoSamplesToChunks;
private Vector<MP4Atom.Record> audioSamplesToChunks;
//keyframe - sample numbers
private Vector<Integer> syncSamples;
//samples
private Vector<Integer> videoSamples;
private Vector<Integer> audioSamples;
//chunk offsets
private Vector<Long> videoChunkOffsets;
private Vector<Long> audioChunkOffsets;
//sample duration
private int videoSampleDuration = 125;
private int audioSampleDuration = 1024;
//keep track of current frame / sample
private int currentFrame = 0;
private int prevFrameSize = 0;
private int prevVideoTS = -1;
private List<MP4Frame> frames = new ArrayList<MP4Frame>();
private long audioCount;
private long videoCount;
// composition time to sample entries
private Vector<MP4Atom.CompositionTimeSampleRecord> compositionTimes;
/**
* Container for metadata and any other tags that should
* be sent prior to media data.
*/
private LinkedList<ITag> firstTags = new LinkedList<ITag>();
/**
* Container for seek points in the video. These are the time stamps
* for the key frames.
*/
private LinkedList<Integer> seekPoints;
/** Constructs a new MP4Reader. */
MP4Reader() {
}
/**
* Creates MP4 reader from file input stream, sets up metadata generation flag.
*
* @param f File input stream
*/
public MP4Reader(File f) throws IOException {
if (null == f) {
log.warn("Reader was passed a null file");
log.debug("{}", ToStringBuilder.reflectionToString(this));
}
this.file = f;
this.fis = new MP4DataStream(new FileInputStream(f));
channel = fis.getChannel();
//decode all the info that we want from the atoms
decodeHeader();
//analyze the samples/chunks and build the keyframe meta data
analyzeFrames();
//add meta data
firstTags.add(createFileMeta());
//create / add the pre-streaming (decoder config) tags
createPreStreamingTags(0, false);
}
/**
* This handles the moov atom being at the beginning or end of the file, so the mdat may also
* be before or after the moov atom.
*/
public void decodeHeader() {
try {
// the first atom will/should be the type
MP4Atom type = MP4Atom.createAtom(fis);
// expect ftyp
log.debug("Type {}", MP4Atom.intToType(type.getType()));
//log.debug("Atom int types - free={} wide={}", MP4Atom.typeToInt("free"), MP4Atom.typeToInt("wide"));
// keep a running count of the number of atoms found at the "top" levels
int topAtoms = 0;
// we want a moov and an mdat, anything else throw the invalid file type error
while (topAtoms < 2) {
MP4Atom atom = MP4Atom.createAtom(fis);
switch (atom.getType()) {
case 1836019574: //moov
topAtoms++;
MP4Atom moov = atom;
// expect moov
log.debug("Type {}", MP4Atom.intToType(moov.getType()));
log.debug("moov children: {}", moov.getChildren());
moovOffset = fis.getOffset() - moov.getSize();
MP4Atom mvhd = moov.lookup(MP4Atom.typeToInt("mvhd"), 0);
if (mvhd != null) {
log.debug("Movie header atom found");
//get the initial timescale
timeScale = mvhd.getTimeScale();
duration = mvhd.getDuration();
log.debug("Time scale {} Duration {}", timeScale, duration);
}
/* nothing needed here yet
MP4Atom meta = moov.lookup(MP4Atom.typeToInt("meta"), 0);
if (meta != null) {
log.debug("Meta atom found");
log.debug("{}", ToStringBuilder.reflectionToString(meta));
}
*/
//we would like to have two tracks, but it shouldn't be a requirement
int loops = 0;
int tracks = 0;
do {
MP4Atom trak = moov.lookup(MP4Atom.typeToInt("trak"), loops);
if (trak != null) {
log.debug("Track atom found");
log.debug("trak children: {}", trak.getChildren());
// trak: tkhd, edts, mdia
MP4Atom tkhd = trak.lookup(MP4Atom.typeToInt("tkhd"), 0);
if (tkhd != null) {
log.debug("Track header atom found");
log.debug("tkhd children: {}", tkhd.getChildren());
if (tkhd.getWidth() > 0) {
width = tkhd.getWidth();
height = tkhd.getHeight();
log.debug("Width {} x Height {}", width, height);
}
}
MP4Atom edts = trak.lookup(MP4Atom.typeToInt("edts"), 0);
if (edts != null) {
log.debug("Edit atom found");
log.debug("edts children: {}", edts.getChildren());
//log.debug("Width {} x Height {}", edts.getWidth(), edts.getHeight());
}
MP4Atom mdia = trak.lookup(MP4Atom.typeToInt("mdia"), 0);
if (mdia != null) {
log.debug("Media atom found");
// mdia: mdhd, hdlr, minf
int scale = 0;
//get the media header atom
MP4Atom mdhd = mdia.lookup(MP4Atom.typeToInt("mdhd"), 0);
if (mdhd != null) {
log.debug("Media data header atom found");
//this will be for either video or audio depending media info
scale = mdhd.getTimeScale();
log.debug("Time scale {}", scale);
}
MP4Atom hdlr = mdia.lookup(MP4Atom.typeToInt("hdlr"), 0);
if (hdlr != null) {
log.debug("Handler ref atom found");
// soun or vide
log.debug("Handler type: {}", MP4Atom.intToType(hdlr.getHandlerType()));
String hdlrType = MP4Atom.intToType(hdlr.getHandlerType());
if ("vide".equals(hdlrType)) {
hasVideo = true;
if (scale > 0) {
videoTimeScale = scale * 1.0;
log.debug("Video time scale: {}", videoTimeScale);
}
} else if ("soun".equals(hdlrType)) {
hasAudio = true;
if (scale > 0) {
audioTimeScale = scale * 1.0;
log.debug("Audio time scale: {}", audioTimeScale);
}
}
tracks++;
}
MP4Atom minf = mdia.lookup(MP4Atom.typeToInt("minf"), 0);
if (minf != null) {
log.debug("Media info atom found");
// minf: (audio) smhd, dinf, stbl / (video) vmhd,
// dinf, stbl
MP4Atom smhd = minf.lookup(MP4Atom.typeToInt("smhd"), 0);
if (smhd != null) {
log.debug("Sound header atom found");
MP4Atom dinf = minf.lookup(MP4Atom.typeToInt("dinf"), 0);
if (dinf != null) {
log.debug("Data info atom found");
// dinf: dref
log.debug("Sound dinf children: {}", dinf.getChildren());
MP4Atom dref = dinf.lookup(MP4Atom.typeToInt("dref"), 0);
if (dref != null) {
log.debug("Data reference atom found");
}
}
MP4Atom stbl = minf.lookup(MP4Atom.typeToInt("stbl"), 0);
if (stbl != null) {
log.debug("Sample table atom found");
// stbl: stsd, stts, stss, stsc, stsz, stco,
// stsh
log.debug("Sound stbl children: {}", stbl.getChildren());
// stsd - sample description
// stts - time to sample
// stsc - sample to chunk
// stsz - sample size
// stco - chunk offset
//stsd - has codec child
MP4Atom stsd = stbl.lookup(MP4Atom.typeToInt("stsd"), 0);
if (stsd != null) {
//stsd: mp4a
log.debug("Sample description atom found");
MP4Atom mp4a = stsd.getChildren().get(0);
//could set the audio codec here
setAudioCodecId(MP4Atom.intToType(mp4a.getType()));
//log.debug("{}", ToStringBuilder.reflectionToString(mp4a));
log.debug("Sample size: {}", mp4a.getSampleSize());
int ats = mp4a.getTimeScale();
//skip invalid audio time scale
if (ats > 0) {
audioTimeScale = ats * 1.0;
}
audioChannels = mp4a.getChannelCount();
log.debug("Sample rate (audio time scale): {}", audioTimeScale);
log.debug("Channels: {}", audioChannels);
//mp4a: esds
if (mp4a.getChildren().size() > 0) {
log.debug("Elementary stream descriptor atom found");
MP4Atom esds = mp4a.getChildren().get(0);
log.debug("{}", ToStringBuilder.reflectionToString(esds));
MP4Descriptor descriptor = esds.getEsd_descriptor();
log.debug("{}", ToStringBuilder.reflectionToString(descriptor));
if (descriptor != null) {
Vector<MP4Descriptor> children = descriptor.getChildren();
for (int e = 0; e < children.size(); e++) {
MP4Descriptor descr = children.get(e);
log.debug("{}", ToStringBuilder.reflectionToString(descr));
if (descr.getChildren().size() > 0) {
Vector<MP4Descriptor> children2 = descr.getChildren();
for (int e2 = 0; e2 < children2.size(); e2++) {
MP4Descriptor descr2 = children2.get(e2);
log.debug("{}", ToStringBuilder.reflectionToString(descr2));
if (descr2.getType() == MP4Descriptor.MP4DecSpecificInfoDescriptorTag) {
//we only want the MP4DecSpecificInfoDescriptorTag
audioDecoderBytes = descr2.getDSID();
//compare the bytes to get the aacaot/aottype
//match first byte
switch (audioDecoderBytes[0]) {
case 0x12:
default:
//AAC LC - 12 10
audioCodecType = 1;
break;
case 0x0a:
//AAC Main - 0A 10
audioCodecType = 0;
break;
case 0x11:
case 0x13:
//AAC LC SBR - 11 90 & 13 xx
audioCodecType = 2;
break;
}
//we want to break out of top level for loop
e = 99;
break;
}
}
}
}
}
}
}
//stsc - has Records
MP4Atom stsc = stbl.lookup(MP4Atom.typeToInt("stsc"), 0);
if (stsc != null) {
log.debug("Sample to chunk atom found");
audioSamplesToChunks = stsc.getRecords();
log.debug("Record count: {}", audioSamplesToChunks.size());
MP4Atom.Record rec = audioSamplesToChunks.firstElement();
log.debug("Record data: Description index={} Samples per chunk={}", rec.getSampleDescriptionIndex(), rec.getSamplesPerChunk());
}
//stsz - has Samples
MP4Atom stsz = stbl.lookup(MP4Atom.typeToInt("stsz"), 0);
if (stsz != null) {
log.debug("Sample size atom found");
audioSamples = stsz.getSamples();
//vector full of integers
log.debug("Sample size: {}", stsz.getSampleSize());
log.debug("Sample count: {}", audioSamples.size());
}
//stco - has Chunks
MP4Atom stco = stbl.lookup(MP4Atom.typeToInt("stco"), 0);
if (stco != null) {
log.debug("Chunk offset atom found");
//vector full of integers
audioChunkOffsets = stco.getChunks();
log.debug("Chunk count: {}", audioChunkOffsets.size());
}
//stts - has TimeSampleRecords
MP4Atom stts = stbl.lookup(MP4Atom.typeToInt("stts"), 0);
if (stts != null) {
log.debug("Time to sample atom found");
Vector<MP4Atom.TimeSampleRecord> records = stts.getTimeToSamplesRecords();
log.debug("Record count: {}", records.size());
MP4Atom.TimeSampleRecord rec = records.firstElement();
log.debug("Record data: Consecutive samples={} Duration={}", rec.getConsecutiveSamples(), rec.getSampleDuration());
//if we have 1 record then all samples have the same duration
if (records.size() > 1) {
//TODO: handle audio samples with varying durations
log.info("Audio samples have differing durations, audio playback may fail");
}
audioSampleDuration = rec.getSampleDuration();
}
}
}
MP4Atom vmhd = minf.lookup(MP4Atom.typeToInt("vmhd"), 0);
if (vmhd != null) {
log.debug("Video header atom found");
MP4Atom dinf = minf.lookup(MP4Atom.typeToInt("dinf"), 0);
if (dinf != null) {
log.debug("Data info atom found");
// dinf: dref
log.debug("Video dinf children: {}", dinf.getChildren());
MP4Atom dref = dinf.lookup(MP4Atom.typeToInt("dref"), 0);
if (dref != null) {
log.debug("Data reference atom found");
}
}
MP4Atom stbl = minf.lookup(MP4Atom.typeToInt("stbl"), 0);
if (stbl != null) {
log.debug("Sample table atom found");
// stbl: stsd, stts, stss, stsc, stsz, stco,
// stsh
log.debug("Video stbl children: {}", stbl.getChildren());
// stsd - sample description
// stts - (decoding) time to sample
// stsc - sample to chunk
// stsz - sample size
// stco - chunk offset
// ctts - (composition) time to sample
// stss - sync sample
// sdtp - independent and disposable samples
//stsd - has codec child
MP4Atom stsd = stbl.lookup(MP4Atom.typeToInt("stsd"), 0);
if (stsd != null) {
log.debug("Sample description atom found");
log.debug("Sample description (video) stsd children: {}", stsd.getChildren());
MP4Atom avc1 = stsd.lookup(MP4Atom.typeToInt("avc1"), 0);
if (avc1 != null) {
log.debug("AVC1 children: {}", avc1.getChildren());
//set the video codec here - may be avc1 or mp4v
setVideoCodecId(MP4Atom.intToType(avc1.getType()));
//video decoder config
//TODO may need to be generic later
MP4Atom codecChild = avc1.lookup(MP4Atom.typeToInt("avcC"), 0);
if (codecChild != null) {
avcLevel = codecChild.getAvcLevel();
log.debug("AVC level: {}", avcLevel);
avcProfile = codecChild.getAvcProfile();
log.debug("AVC Profile: {}", avcProfile);
log.debug("AVCC size: {}", codecChild.getSize());
videoDecoderBytes = codecChild.getVideoConfigBytes();
log.debug("Video config bytes: {}", ToStringBuilder.reflectionToString(videoDecoderBytes));
} else {
//quicktime and ipods use a pixel aspect atom
//since we have no avcC check for this and avcC may
//be a child
MP4Atom pasp = avc1.lookup(MP4Atom.typeToInt("pasp"), 0);
if (pasp != null) {
log.debug("PASP children: {}", pasp.getChildren());
codecChild = pasp.lookup(MP4Atom.typeToInt("avcC"), 0);
if (codecChild != null) {
avcLevel = codecChild.getAvcLevel();
log.debug("AVC level: {}", avcLevel);
avcProfile = codecChild.getAvcProfile();
log.debug("AVC Profile: {}", avcProfile);
log.debug("AVCC size: {}", codecChild.getSize());
videoDecoderBytes = codecChild.getVideoConfigBytes();
log.debug("Video config bytes: {}", ToStringBuilder.reflectionToString(videoDecoderBytes));
}
}
}
} else {
//look for mp4v
MP4Atom mp4v = stsd.lookup(MP4Atom.typeToInt("mp4v"), 0);
if (mp4v != null) {
log.debug("MP4V children: {}", mp4v.getChildren());
//set the video codec here - may be avc1 or mp4v
setVideoCodecId(MP4Atom.intToType(mp4v.getType()));
//look for esds
MP4Atom codecChild = mp4v.lookup(MP4Atom.typeToInt("esds"), 0);
if (codecChild != null) {
//look for descriptors
MP4Descriptor descriptor = codecChild.getEsd_descriptor();
log.debug("{}", ToStringBuilder.reflectionToString(descriptor));
if (descriptor != null) {
Vector<MP4Descriptor> children = descriptor.getChildren();
for (int e = 0; e < children.size(); e++) {
MP4Descriptor descr = children.get(e);
log.debug("{}", ToStringBuilder.reflectionToString(descr));
if (descr.getChildren().size() > 0) {
Vector<MP4Descriptor> children2 = descr.getChildren();
for (int e2 = 0; e2 < children2.size(); e2++) {
MP4Descriptor descr2 = children2.get(e2);
log.debug("{}", ToStringBuilder.reflectionToString(descr2));
if (descr2.getType() == MP4Descriptor.MP4DecSpecificInfoDescriptorTag) {
//we only want the MP4DecSpecificInfoDescriptorTag
videoDecoderBytes = new byte[descr2.getDSID().length - 8];
System.arraycopy(descr2.getDSID(), 8, videoDecoderBytes, 0, videoDecoderBytes.length);
log.debug("Video config bytes: {}", ToStringBuilder.reflectionToString(videoDecoderBytes));
//we want to break out of top level for loop
e = 99;
break;
}
}
}
}
}
}
}
}
log.debug("{}", ToStringBuilder.reflectionToString(avc1));
}
//stsc - has Records
MP4Atom stsc = stbl.lookup(MP4Atom.typeToInt("stsc"), 0);
if (stsc != null) {
log.debug("Sample to chunk atom found");
videoSamplesToChunks = stsc.getRecords();
log.debug("Record count: {}", videoSamplesToChunks.size());
MP4Atom.Record rec = videoSamplesToChunks.firstElement();
log.debug("Record data: Description index={} Samples per chunk={}", rec.getSampleDescriptionIndex(), rec.getSamplesPerChunk());
}
//stsz - has Samples
MP4Atom stsz = stbl.lookup(MP4Atom.typeToInt("stsz"), 0);
if (stsz != null) {
log.debug("Sample size atom found");
//vector full of integers
videoSamples = stsz.getSamples();
//if sample size is 0 then the table must be checked due
//to variable sample sizes
log.debug("Sample size: {}", stsz.getSampleSize());
videoSampleCount = videoSamples.size();
log.debug("Sample count: {}", videoSampleCount);
}
//stco - has Chunks
MP4Atom stco = stbl.lookup(MP4Atom.typeToInt("stco"), 0);
if (stco != null) {
log.debug("Chunk offset atom found");
//vector full of integers
videoChunkOffsets = stco.getChunks();
log.debug("Chunk count: {}", videoChunkOffsets.size());
}
//stss - has Sync - no sync means all samples are keyframes
MP4Atom stss = stbl.lookup(MP4Atom.typeToInt("stss"), 0);
if (stss != null) {
log.debug("Sync sample atom found");
//vector full of integers
syncSamples = stss.getSyncSamples();
log.debug("Keyframes: {}", syncSamples.size());
}
//stts - has TimeSampleRecords
MP4Atom stts = stbl.lookup(MP4Atom.typeToInt("stts"), 0);
if (stts != null) {
log.debug("Time to sample atom found");
Vector<MP4Atom.TimeSampleRecord> records = stts.getTimeToSamplesRecords();
log.debug("Record count: {}", records.size());
MP4Atom.TimeSampleRecord rec = records.firstElement();
log.debug("Record data: Consecutive samples={} Duration={}", rec.getConsecutiveSamples(), rec.getSampleDuration());
//if we have 1 record then all samples have the same duration
if (records.size() > 1) {
//TODO: handle video samples with varying durations
log.info("Video samples have differing durations, video playback may fail");
}
videoSampleDuration = rec.getSampleDuration();
}
//ctts - (composition) time to sample
MP4Atom ctts = stbl.lookup(MP4Atom.typeToInt("ctts"), 0);
if (ctts != null) {
log.debug("Composition time to sample atom found");
//vector full of integers
compositionTimes = ctts.getCompositionTimeToSamplesRecords();
log.debug("Record count: {}", compositionTimes.size());
if (log.isTraceEnabled()) {
for (MP4Atom.CompositionTimeSampleRecord rec : compositionTimes) {
double offset = rec.getSampleOffset();
if (scale > 0d) {
offset = (offset / (double) scale) * 1000.0;
rec.setSampleOffset((int) offset);
}
log.trace("Record data: Consecutive samples={} Offset={}", rec.getConsecutiveSamples(), rec.getSampleOffset());
}
}
}
}
}
}
}
}
loops++;
} while (loops < 3);
log.trace("Busted out of track loop with {} tracks after {} loops", tracks, loops);
//calculate FPS
fps = (videoSampleCount * timeScale) / (double) duration;
log.debug("FPS calc: ({} * {}) / {}", new Object[] { videoSampleCount, timeScale, duration });
log.debug("FPS: {}", fps);
//real duration
StringBuilder sb = new StringBuilder();
double videoTime = ((double) duration / (double) timeScale);
log.debug("Video time: {}", videoTime);
int minutes = (int) (videoTime / 60);
if (minutes > 0) {
sb.append(minutes);
sb.append('.');
}
//formatter for seconds / millis
NumberFormat df = DecimalFormat.getInstance();
df.setMaximumFractionDigits(2);
sb.append(df.format((videoTime % 60)));
formattedDuration = sb.toString();
log.debug("Time: {}", formattedDuration);
break;
case 1835295092: //mdat
topAtoms++;
long dataSize = 0L;
MP4Atom mdat = atom;
dataSize = mdat.getSize();
log.debug("{}", ToStringBuilder.reflectionToString(mdat));
mdatOffset = fis.getOffset() - dataSize;
log.debug("File size: {} mdat size: {}", file.length(), dataSize);
break;
case 1718773093: //free
case 2003395685: //wide
break;
default:
log.warn("Unexpected atom: {}", MP4Atom.intToType(atom.getType()));
}
}
//add the tag name (size) to the offsets
moovOffset += 8;
mdatOffset += 8;
log.debug("Offsets moov: {} mdat: {}", moovOffset, mdatOffset);
} catch (IOException e) {
log.error("Exception decoding header / atoms", e);
}
}
/**
* Get the total readable bytes in a file or IoBuffer.
*
* @return Total readable bytes
*/
public long getTotalBytes() {
try {
return channel.size();
} catch (Exception e) {
log.error("Error getTotalBytes", e);
}
if (file != null) {
//just return the file size
return file.length();
} else {
return 0;
}
}
/**
* Get the current position in a file or IoBuffer.
*
* @return Current position in a file
*/
private long getCurrentPosition() {
try {
//if we are at the end of the file drop back to mdat offset
if (channel.position() == channel.size()) {
log.debug("Reached end of file, going back to data offset");
channel.position(mdatOffset);
}
return channel.position();
} catch (Exception e) {
log.error("Error getCurrentPosition", e);
return 0;
}
}
/** {@inheritDoc} */
public boolean hasVideo() {
return hasVideo;
}
/**
* Returns the file buffer.
*
* @return File contents as byte buffer
*/
public IoBuffer getFileData() {
// TODO as of now, return null will disable cache
// we need to redesign the cache architecture so that
// the cache is layered underneath FLVReader not above it,
// thus both tag cache and file cache are feasible.
return null;
}
/** {@inheritDoc}
*/
public IStreamableFile getFile() {
// TODO wondering if we need to have a reference
return null;
}
/** {@inheritDoc}
*/
public int getOffset() {
// XXX what's the difference from getBytesRead
return 0;
}
/** {@inheritDoc}
*/
public long getBytesRead() {
// XXX should summarize the total bytes read or
// just the current position?
return getCurrentPosition();
}
/** {@inheritDoc} */
public long getDuration() {
return duration;
}
public String getVideoCodecId() {
return videoCodecId;
}
public String getAudioCodecId() {
return audioCodecId;
}
/** {@inheritDoc}
*/
public boolean hasMoreTags() {
return currentFrame < frames.size();
}
/**
* Create tag for metadata event.
*
* Info from http://www.kaourantin.net/2007/08/what-just-happened-to-video-on-web_20.html
* <pre>
duration - Obvious. But unlike for FLV files this field will always be present.
videocodecid - For H.264 we report 'avc1'.
audiocodecid - For AAC we report 'mp4a', for MP3 we report '.mp3'.
avcprofile - 66, 77, 88, 100, 110, 122 or 144 which corresponds to the H.264 profiles.
avclevel - A number between 10 and 51. Consult this list to find out more.
aottype - Either 0, 1 or 2. This corresponds to AAC Main, AAC LC and SBR audio types.
moovposition - The offset in bytes of the moov atom in a file.
trackinfo - An array of objects containing various infomation about all the tracks in a file
ex.
trackinfo[0].length: 7081
trackinfo[0].timescale: 600
trackinfo[0].sampledescription.sampletype: avc1
trackinfo[0].language: und
trackinfo[1].length: 525312
trackinfo[1].timescale: 44100
trackinfo[1].sampledescription.sampletype: mp4a
trackinfo[1].language: und
chapters - As mentioned above information about chapters in audiobooks.
seekpoints - As mentioned above times you can directly feed into NetStream.seek();
videoframerate - The frame rate of the video if a monotone frame rate is used.
Most videos will have a monotone frame rate.
audiosamplerate - The original sampling rate of the audio track.
audiochannels - The original number of channels of the audio track.
tags - As mentioned above ID3 like tag information.
* </pre>
* Info from
* <pre>
width: Display width in pixels.
height: Display height in pixels.
duration: Duration in seconds.
avcprofile: AVC profile number such as 55, 77, 100 etc.
avclevel: AVC IDC level number such as 10, 11, 20, 21 etc.
aacaot: AAC audio object type; 0, 1 or 2 are supported.
videoframerate: Frame rate of the video in this MP4.
seekpoints: Array that lists the available keyframes in a file as time stamps in milliseconds.
This is optional as the MP4 file might not contain this information. Generally speaking,
most MP4 files will include this by default.
videocodecid: Usually a string such as "avc1" or "VP6F."
audiocodecid: Usually a string such as ".mp3" or "mp4a."
progressivedownloadinfo: Object that provides information from the "pdin" atom. This is optional
and many files will not have this field.
trackinfo: Object that provides information on all the tracks in the MP4 file, including their
sample description ID.
tags: Array of key value pairs representing the information present in the "ilst" atom, which is
the equivalent of ID3 tags for MP4 files. These tags are mostly used by iTunes.
* </pre>
*
* @return Metadata event tag
*/
ITag createFileMeta() {
log.debug("Creating onMetaData");
// Create tag for onMetaData event
IoBuffer buf = IoBuffer.allocate(1024);
buf.setAutoExpand(true);
Output out = new Output(buf);
out.writeString("onMetaData");
Map<Object, Object> props = new HashMap<Object, Object>();
// Duration property
props.put("duration", ((double) duration / (double) timeScale));
props.put("width", width);
props.put("height", height);
// Video codec id
props.put("videocodecid", videoCodecId);
props.put("avcprofile", avcProfile);
props.put("avclevel", avcLevel);
props.put("videoframerate", fps);
// Audio codec id - watch for mp3 instead of aac
props.put("audiocodecid", audioCodecId);
props.put("aacaot", audioCodecType);
props.put("audiosamplerate", audioTimeScale);
props.put("audiochannels", audioChannels);
props.put("moovposition", moovOffset);
//props.put("chapters", ""); //this is for f4b - books
if (seekPoints != null) {
props.put("seekpoints", seekPoints);
}
//tags will only appear if there is an "ilst" atom in the file
//props.put("tags", "");
List<Map<String, Object>> arr = new ArrayList<Map<String, Object>>(2);
if (hasAudio) {
Map<String, Object> audioMap = new HashMap<String, Object>(4);
audioMap.put("timescale", audioTimeScale);
audioMap.put("language", "und");
List<Map<String, String>> desc = new ArrayList<Map<String, String>>(1);
audioMap.put("sampledescription", desc);
Map<String, String> sampleMap = new HashMap<String, String>(1);
sampleMap.put("sampletype", audioCodecId);
desc.add(sampleMap);
if (audioSamples != null) {
audioMap.put("length_property", audioSampleDuration * audioSamples.size());
//release some memory, since we're done with the vectors
audioSamples.clear();
audioSamples = null;
}
arr.add(audioMap);
}
if (hasVideo) {
Map<String, Object> videoMap = new HashMap<String, Object>(3);
videoMap.put("timescale", videoTimeScale);
videoMap.put("language", "und");
List<Map<String, String>> desc = new ArrayList<Map<String, String>>(1);
videoMap.put("sampledescription", desc);
Map<String, String> sampleMap = new HashMap<String, String>(1);
sampleMap.put("sampletype", videoCodecId);
desc.add(sampleMap);
if (videoSamples != null) {
videoMap.put("length_property", videoSampleDuration * videoSamples.size());
//release some memory, since we're done with the vectors
videoSamples.clear();
videoSamples = null;
}
arr.add(videoMap);
}
props.put("trackinfo", arr);
//set this based on existence of seekpoints
props.put("canSeekToEnd", (seekPoints != null));
out.writeMap(props, new Serializer());
buf.flip();
//now that all the meta properties are done, update the duration
duration = Math.round(duration * 1000d);
ITag result = new Tag(IoConstants.TYPE_METADATA, 0, buf.limit(), null, 0);
result.setBody(buf);
return result;
}
/**
* Tag sequence
* MetaData, Video config, Audio config, remaining audio and video
*
* Packet prefixes:
* 17 00 00 00 00 = Video extra data (first video packet)
* 17 01 00 00 00 = Video keyframe
* 27 01 00 00 00 = Video interframe
* af 00 ... 06 = Audio extra data (first audio packet)
* af 01 = Audio frame
*
* Audio extra data(s):
* af 00 = Prefix
* 11 90 4f 14 = AAC Main = aottype 0
* 12 10 = AAC LC = aottype 1
* 13 90 56 e5 a5 48 00 = HE-AAC SBR = aottype 2
* 06 = Suffix
*
* Still not absolutely certain about this order or the bytes - need to verify later
*/
private void createPreStreamingTags(int timestamp, boolean clear) {
log.debug("Creating pre-streaming tags");
if (clear) {
firstTags.clear();
}
ITag tag = null;
IoBuffer body = null;
if (hasVideo) {
//video tag #1
//TODO: this data is only for backcountry bombshells - make this dynamic
body = IoBuffer.allocate(41);
body.setAutoExpand(true);
body.put(PREFIX_VIDEO_CONFIG_FRAME); //prefix
if (videoDecoderBytes != null) {
//because of other processing we do this check
// if (log.isDebugEnabled()) {
// log.debug("Video decoder bytes: {}", HexDump.byteArrayToHexString(videoDecoderBytes));
// try {
// log.debug("Video bytes data: {}", new String(videoDecoderBytes, "UTF-8"));
// } catch (UnsupportedEncodingException e) {
// log.error("", e);
// }
// }
body.put(videoDecoderBytes);
}
tag = new Tag(IoConstants.TYPE_VIDEO, timestamp, body.position(), null, 0);
body.flip();
tag.setBody(body);
//add tag
firstTags.add(tag);
}
if (hasAudio) {
//audio tag #1
//TODO: this data is only for backcountry bombshells - make this dynamic
body = IoBuffer.allocate(7);
body.setAutoExpand(true);
body.put(new byte[] { (byte) 0xaf, (byte) 0 }); //prefix
if (audioDecoderBytes != null) {
//because of other processing we do this check
// if (log.isDebugEnabled()) {
// log.debug("Audio decoder bytes: {}", HexDump.byteArrayToHexString(audioDecoderBytes));
// try {
// log.debug("Audio bytes data: {}", new String(audioDecoderBytes, "UTF-8"));
// } catch (UnsupportedEncodingException e) {
// log.error("", e);
// }
// }
body.put(audioDecoderBytes);
} else {
//default to aac-lc when the esds doesnt contain descripter bytes
body.put(AUDIO_CONFIG_FRAME_AAC_LC);
}
body.put((byte) 0x06); //suffix
tag = new Tag(IoConstants.TYPE_AUDIO, timestamp, body.position(), null, tag.getBodySize());
body.flip();
tag.setBody(body);
//add tag
firstTags.add(tag);
}
}
/**
* Packages media data for return to providers
*/
public synchronized ITag readTag() {
//log.debug("Read tag");
//empty-out the pre-streaming tags first
if (!firstTags.isEmpty()) {
//log.debug("Returning pre-tag");
// Return first tags before media data
return firstTags.removeFirst();
}
//log.debug("Read tag - sample {} prevFrameSize {} audio: {} video: {}", new Object[]{currentSample, prevFrameSize, audioCount, videoCount});
//get the current frame
MP4Frame frame = frames.get(currentFrame);
log.debug("Playback #{} {}", currentFrame, frame);
int sampleSize = frame.getSize();
int time = (int) Math.round(frame.getTime() * 1000.0);
//log.debug("Read tag - dst: {} base: {} time: {}", new Object[]{frameTs, baseTs, time});
long samplePos = frame.getOffset();
//log.debug("Read tag - samplePos {}", samplePos);
//determine frame type and packet body padding
byte type = frame.getType();
//assume video type
int pad = 5;
if (type == TYPE_AUDIO) {
pad = 2;
}
//create a byte buffer of the size of the sample
ByteBuffer data = ByteBuffer.allocate(sampleSize + pad);
try {
//prefix is different for keyframes
if (type == TYPE_VIDEO) {
if (frame.isKeyFrame()) {
//log.debug("Writing keyframe prefix");
data.put(PREFIX_VIDEO_KEYFRAME);
} else {
//log.debug("Writing interframe prefix");
data.put(PREFIX_VIDEO_FRAME);
}
// match the sample with its ctts / mdhd adjustment time
int timeOffset = prevVideoTS != -1 ? time - prevVideoTS : 0;
data.put((byte) ((timeOffset >>> 16) & 0xff));
data.put((byte) ((timeOffset >>> 8) & 0xff));
data.put((byte) (timeOffset & 0xff));
if (log.isTraceEnabled()) {
byte[] prefix = new byte[5];
int p = data.position();
data.position(0);
data.get(prefix);
data.position(p);
log.trace("{}", prefix);
}
// track video frame count
videoCount++;
prevVideoTS = time;
} else {
//log.debug("Writing audio prefix");
data.put(PREFIX_AUDIO_FRAME);
// track audio frame count
audioCount++;
}
//do we need to add the mdat offset to the sample position?
channel.position(samplePos);
channel.read(data);
} catch (IOException e) {
log.error("Error on channel position / read", e);
}
//chunk the data
IoBuffer payload = IoBuffer.wrap(data.array());
//create the tag
ITag tag = new Tag(type, time, payload.limit(), payload, prevFrameSize);
//log.debug("Read tag - type: {} body size: {}", (type == TYPE_AUDIO ? "Audio" : "Video"), tag.getBodySize());
//increment the frame number
currentFrame++;
//set the frame / tag size
prevFrameSize = tag.getBodySize();
//log.debug("Tag: {}", tag);
return tag;
}
/**
* Performs frame analysis and generates metadata for use in seeking. All the frames
* are analyzed and sorted together based on time and offset.
*/
public void analyzeFrames() {
log.debug("Analyzing frames");
// Maps positions, samples, timestamps to one another
timePosMap = new HashMap<Integer, Long>();
samplePosMap = new HashMap<Integer, Long>();
// tag == sample
int sample = 1;
// position
Long pos = null;
// if audio-only, skip this
if (videoSamplesToChunks != null) {
// handle composite times
int compositeIndex = 0;
CompositionTimeSampleRecord compositeTimeEntry = null;
if (compositionTimes != null && !compositionTimes.isEmpty()) {
compositeTimeEntry = compositionTimes.remove(0);
}
for (int i = 0; i < videoSamplesToChunks.size(); i++) {
MP4Atom.Record record = videoSamplesToChunks.get(i);
int firstChunk = record.getFirstChunk();
int lastChunk = videoChunkOffsets.size();
if (i < videoSamplesToChunks.size() - 1) {
MP4Atom.Record nextRecord = videoSamplesToChunks.get(i + 1);
lastChunk = nextRecord.getFirstChunk() - 1;
}
for (int chunk = firstChunk; chunk <= lastChunk; chunk++) {
int sampleCount = record.getSamplesPerChunk();
pos = videoChunkOffsets.elementAt(chunk - 1);
while (sampleCount > 0) {
//log.debug("Position: {}", pos);
samplePosMap.put(sample, pos);
//calculate ts
double ts = (videoSampleDuration * (sample - 1)) / videoTimeScale;
//check to see if the sample is a keyframe
boolean keyframe = false;
//some files appear not to have sync samples
if (syncSamples != null) {
keyframe = syncSamples.contains(sample);
if (seekPoints == null) {
seekPoints = new LinkedList<Integer>();
}
int keyframeTs = (int) Math.round(ts * 1000.0);
seekPoints.add(keyframeTs);
timePosMap.put(keyframeTs, pos);
}
//size of the sample
int size = (videoSamples.get(sample - 1)).intValue();
//create a frame
MP4Frame frame = new MP4Frame();
frame.setKeyFrame(keyframe);
frame.setOffset(pos);
frame.setSize(size);
frame.setTime(ts);
frame.setType(TYPE_VIDEO);
//set time offset value from composition records
if (compositeTimeEntry != null) {
// how many samples have this offset
int consecutiveSamples = compositeTimeEntry.getConsecutiveSamples();
frame.setTimeOffset(compositeTimeEntry.getSampleOffset());
// increment our count
compositeIndex++;
if (compositeIndex - consecutiveSamples == 0) {
// ensure there are still times available
if (!compositionTimes.isEmpty()) {
// get the next one
compositeTimeEntry = compositionTimes.remove(0);
}
// reset
compositeIndex = 0;
}
}
// add the frame
frames.add(frame);
//log.debug("Sample #{} {}", sample, frame);
//inc and dec stuff
pos += size;
sampleCount--;
sample++;
}
}
}
log.debug("Sample position map (video): {}", samplePosMap);
}
// if video-only, skip this
if (audioSamplesToChunks != null) {
//add the audio frames / samples / chunks
sample = 1;
for (int i = 0; i < audioSamplesToChunks.size(); i++) {
MP4Atom.Record record = audioSamplesToChunks.get(i);
int firstChunk = record.getFirstChunk();
int lastChunk = audioChunkOffsets.size();
if (i < audioSamplesToChunks.size() - 1) {
MP4Atom.Record nextRecord = audioSamplesToChunks.get(i + 1);
lastChunk = nextRecord.getFirstChunk() - 1;
}
for (int chunk = firstChunk; chunk <= lastChunk; chunk++) {
int sampleCount = record.getSamplesPerChunk();
pos = audioChunkOffsets.elementAt(chunk - 1);
while (sampleCount > 0) {
//calculate ts
double ts = (audioSampleDuration * (sample - 1)) / audioTimeScale;
//sample size
int size = (audioSamples.get(sample - 1)).intValue();
//create a frame
MP4Frame frame = new MP4Frame();
frame.setOffset(pos);
frame.setSize(size);
frame.setTime(ts);
frame.setType(TYPE_AUDIO);
frames.add(frame);
//log.debug("Sample #{} {}", sample, frame);
//inc and dec stuff
pos += size;
sampleCount--;
sample++;
}
}
}
}
//sort the frames
Collections.sort(frames);
log.debug("Frames count: {}", frames.size());
//log.debug("Frames: {}", frames);
//release some memory, since we're done with the vectors
if (audioSamplesToChunks != null) {
audioChunkOffsets.clear();
audioChunkOffsets = null;
audioSamplesToChunks.clear();
audioSamplesToChunks = null;
}
if (videoSamplesToChunks != null) {
videoChunkOffsets.clear();
videoChunkOffsets = null;
videoSamplesToChunks.clear();
videoSamplesToChunks = null;
}
if (syncSamples != null) {
syncSamples.clear();
syncSamples = null;
}
}
/**
* Put the current position to pos. The caller must ensure the pos is a valid one.
*
* @param pos position to move to in file / channel
*/
public void position(long pos) {
log.debug("Position: {}", pos);
log.debug("Current frame: {}", currentFrame);
int len = frames.size();
MP4Frame frame = null;
for (int f = 0; f < len; f++) {
frame = frames.get(f);
long offset = frame.getOffset();
//look for pos to match frame offset or grab the first keyframe
//beyond the offset
if (pos == offset || (offset > pos && frame.isKeyFrame())) {
//ensure that it is a keyframe
if (!frame.isKeyFrame()) {
log.debug("Frame #{} was not a key frame, so trying again..", f);
continue;
}
log.info("Frame #{} found for seek: {}", f, frame);
createPreStreamingTags((int) (frame.getTime() * 1000), true);
currentFrame = f;
break;
}
prevVideoTS = (int) (frame.getTime() * 1000);
}
//
log.debug("Setting current frame: {}", currentFrame);
}
/** {@inheritDoc}
*/
public void close() {
log.debug("Close");
if (channel != null) {
try {
channel.close();
fis.close();
fis = null;
} catch (IOException e) {
log.error("Channel close {}", e);
} finally {
if (frames != null) {
frames.clear();
frames = null;
}
}
}
}
public void setVideoCodecId(String videoCodecId) {
this.videoCodecId = videoCodecId;
}
public void setAudioCodecId(String audioCodecId) {
this.audioCodecId = audioCodecId;
}
public ITag readTagHeader() {
return null;
}
@Override
public KeyFrameMeta analyzeKeyFrames() {
KeyFrameMeta result = new KeyFrameMeta();
result.audioOnly = hasAudio && !hasVideo;
result.duration = duration;
result.positions = new long[seekPoints.size()];
result.timestamps = new int[seekPoints.size()];
for (int idx=0; idx<seekPoints.size(); idx++) {
final Integer ts = seekPoints.get(idx);
result.positions[idx] = timePosMap.get(ts);
result.timestamps[idx] = ts;
}
return result;
}
}