package org.mp4parser.muxer.tracks.h264; import org.mp4parser.boxes.iso14496.part12.CompositionTimeToSample; import org.mp4parser.boxes.iso14496.part12.SampleDependencyTypeBox; import org.mp4parser.boxes.iso14496.part12.SampleDescriptionBox; import org.mp4parser.boxes.iso14496.part15.AvcConfigurationBox; import org.mp4parser.boxes.sampleentry.VisualSampleEntry; import org.mp4parser.muxer.DataSource; import org.mp4parser.muxer.FileDataSourceImpl; import org.mp4parser.muxer.Sample; import org.mp4parser.muxer.tracks.AbstractH26XTrack; import org.mp4parser.muxer.tracks.h264.parsing.model.PictureParameterSet; import org.mp4parser.muxer.tracks.h264.parsing.model.SeqParameterSet; import org.mp4parser.tools.Mp4Arrays; import org.mp4parser.tools.RangeStartMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.util.*; /** * The <code>H264TrackImpl</code> creates a <code>Track</code> from an H.264 * Annex B file. */ public class H264TrackImpl extends AbstractH26XTrack { private static Logger LOG = LoggerFactory.getLogger(H264TrackImpl.class.getName()); Map<Integer, ByteBuffer> spsIdToSpsBytes = new HashMap<Integer, ByteBuffer>(); Map<Integer, SeqParameterSet> spsIdToSps = new HashMap<Integer, SeqParameterSet>(); Map<Integer, ByteBuffer> ppsIdToPpsBytes = new HashMap<Integer, ByteBuffer>(); Map<Integer, PictureParameterSet> ppsIdToPps = new HashMap<Integer, PictureParameterSet>(); SampleDescriptionBox sampleDescriptionBox; SeqParameterSet firstSeqParameterSet = null; PictureParameterSet firstPictureParameterSet = null; SeqParameterSet currentSeqParameterSet = null; PictureParameterSet currentPictureParameterSet = null; RangeStartMap<Integer, ByteBuffer> seqParameterRangeMap = new RangeStartMap<Integer, ByteBuffer>(); RangeStartMap<Integer, ByteBuffer> pictureParameterRangeMap = new RangeStartMap<Integer, ByteBuffer>(); int frameNrInGop = 0; int[] pictureOrderCounts = new int[0]; int prevPicOrderCntLsb = 0; int prevPicOrderCntMsb = 0; long psize = 0; long pcount = 0; long bsize = 0; long bcount = 0; long isize = 0; long icount = 0; private List<Sample> samples; private int width; private int height; private long timescale; private int frametick; private SEIMessage seiMessage; private boolean determineFrameRate = true; private String lang = "eng"; /** * Creates a new <code>Track</code> object from a raw H264 source (<code>DataSource dataSource1</code>). * Whenever the timescale and frametick are set to negative value (e.g. -1) the H264TrackImpl * tries to detect the frame rate. * Typically values for <code>timescale</code> and <code>frametick</code> are: * <ul> * <li>23.976 FPS: timescale = 24000; frametick = 1001</li> * <li>25 FPS: timescale = 25; frametick = 1</li> * <li>29.97 FPS: timescale = 30000; frametick = 1001</li> * <li>30 FPS: timescale = 30; frametick = 1</li> * </ul> * * @param dataSource the source file of the H264 samples * @param lang language of the movie (in doubt: use "eng") * @param timescale number of time units (ticks) in one second * @param frametick number of time units (ticks) that pass while showing exactly one frame * @throws IOException in case of problems whiel reading from the <code>DataSource</code> */ public H264TrackImpl(DataSource dataSource, String lang, long timescale, int frametick) throws IOException { super(dataSource); this.lang = lang; this.timescale = timescale; //e.g. 23976 this.frametick = frametick; if ((timescale > 0) && (frametick > 0)) { this.determineFrameRate = false; } parse(new LookAhead(dataSource)); } public H264TrackImpl(DataSource dataSource, String lang) throws IOException { this(dataSource, lang, -1, -1); } public H264TrackImpl(DataSource dataSource) throws IOException { this(dataSource, "eng"); } public static void main(String[] args) throws IOException { new H264TrackImpl(new FileDataSourceImpl("C:\\dev\\mp4parser\\tos.264")); } public static H264NalUnitHeader getNalUnitHeader(ByteBuffer nal) { H264NalUnitHeader nalUnitHeader = new H264NalUnitHeader(); int type = nal.get(0); nalUnitHeader.nal_ref_idc = (type >> 5) & 3; nalUnitHeader.nal_unit_type = type & 0x1f; return nalUnitHeader; } private void parse(LookAhead la) throws IOException { samples = new ArrayList<Sample>(); if (!readSamples(la)) { throw new IOException(); } /* System.err.println("psize: " + psize + "(" + pcount + ")"); System.err.println("bsize: " + bsize + "(" + bcount + ")"); System.err.println("isize: " + isize + "(" + icount + ")");*/ if (!readVariables()) { throw new IOException(); } sampleDescriptionBox = new SampleDescriptionBox(); VisualSampleEntry visualSampleEntry = new VisualSampleEntry("avc1"); visualSampleEntry.setDataReferenceIndex(1); visualSampleEntry.setDepth(24); visualSampleEntry.setFrameCount(1); visualSampleEntry.setHorizresolution(72); visualSampleEntry.setVertresolution(72); visualSampleEntry.setWidth(width); visualSampleEntry.setHeight(height); visualSampleEntry.setCompressorname("AVC Coding"); AvcConfigurationBox avcConfigurationBox = new AvcConfigurationBox(); avcConfigurationBox.setSequenceParameterSets(new ArrayList<ByteBuffer>(spsIdToSpsBytes.values())); avcConfigurationBox.setPictureParameterSets(new ArrayList<ByteBuffer>(ppsIdToPpsBytes.values())); avcConfigurationBox.setAvcLevelIndication(firstSeqParameterSet.level_idc); avcConfigurationBox.setAvcProfileIndication(firstSeqParameterSet.profile_idc); avcConfigurationBox.setBitDepthLumaMinus8(firstSeqParameterSet.bit_depth_luma_minus8); avcConfigurationBox.setBitDepthChromaMinus8(firstSeqParameterSet.bit_depth_chroma_minus8); avcConfigurationBox.setChromaFormat(firstSeqParameterSet.chroma_format_idc.getId()); avcConfigurationBox.setConfigurationVersion(1); avcConfigurationBox.setLengthSizeMinusOne(3); avcConfigurationBox.setProfileCompatibility( (firstSeqParameterSet.constraint_set_0_flag ? 128 : 0) + (firstSeqParameterSet.constraint_set_1_flag ? 64 : 0) + (firstSeqParameterSet.constraint_set_2_flag ? 32 : 0) + (firstSeqParameterSet.constraint_set_3_flag ? 16 : 0) + (firstSeqParameterSet.constraint_set_4_flag ? 8 : 0) + (int) (firstSeqParameterSet.reserved_zero_2bits & 0x3) ); visualSampleEntry.addBox(avcConfigurationBox); sampleDescriptionBox.addBox(visualSampleEntry); trackMetaData.setCreationTime(new Date()); trackMetaData.setModificationTime(new Date()); trackMetaData.setLanguage(lang); trackMetaData.setTimescale(timescale); trackMetaData.setWidth(width); trackMetaData.setHeight(height); } public SampleDescriptionBox getSampleDescriptionBox() { return sampleDescriptionBox; } public String getHandler() { return "vide"; } public List<Sample> getSamples() { return samples; } private boolean readVariables() { width = (firstSeqParameterSet.pic_width_in_mbs_minus1 + 1) * 16; int mult = 2; if (firstSeqParameterSet.frame_mbs_only_flag) { mult = 1; } height = 16 * (firstSeqParameterSet.pic_height_in_map_units_minus1 + 1) * mult; if (firstSeqParameterSet.frame_cropping_flag) { int chromaArrayType = 0; if (!firstSeqParameterSet.residual_color_transform_flag) { chromaArrayType = firstSeqParameterSet.chroma_format_idc.getId(); } int cropUnitX = 1; int cropUnitY = mult; if (chromaArrayType != 0) { cropUnitX = firstSeqParameterSet.chroma_format_idc.getSubWidth(); cropUnitY = firstSeqParameterSet.chroma_format_idc.getSubHeight() * mult; } width -= cropUnitX * (firstSeqParameterSet.frame_crop_left_offset + firstSeqParameterSet.frame_crop_right_offset); height -= cropUnitY * (firstSeqParameterSet.frame_crop_top_offset + firstSeqParameterSet.frame_crop_bottom_offset); } return true; } private boolean readSamples(LookAhead la) throws IOException { List<ByteBuffer> buffered = new ArrayList<ByteBuffer>(); ByteBuffer nal; class FirstVclNalDetector { int frame_num; int pic_parameter_set_id; boolean field_pic_flag; boolean bottom_field_flag; int nal_ref_idc; int pic_order_cnt_type; int delta_pic_order_cnt_bottom; int pic_order_cnt_lsb; int delta_pic_order_cnt_0; int delta_pic_order_cnt_1; boolean idrPicFlag; int idr_pic_id; public FirstVclNalDetector(ByteBuffer nal, int nal_ref_idc, int nal_unit_type) { InputStream bs = cleanBuffer(new ByteBufferBackedInputStream(nal)); SliceHeader sh = new SliceHeader(bs, spsIdToSps, ppsIdToPps, nal_unit_type == 5); this.frame_num = sh.frame_num; this.pic_parameter_set_id = sh.pic_parameter_set_id; this.field_pic_flag = sh.field_pic_flag; this.bottom_field_flag = sh.bottom_field_flag; this.nal_ref_idc = nal_ref_idc; this.pic_order_cnt_type = spsIdToSps.get(ppsIdToPps.get(sh.pic_parameter_set_id).seq_parameter_set_id).pic_order_cnt_type; this.delta_pic_order_cnt_bottom = sh.delta_pic_order_cnt_bottom; this.pic_order_cnt_lsb = sh.pic_order_cnt_lsb; this.delta_pic_order_cnt_0 = sh.delta_pic_order_cnt_0; this.delta_pic_order_cnt_1 = sh.delta_pic_order_cnt_1; this.idr_pic_id = sh.idr_pic_id; } boolean isFirstInNew(FirstVclNalDetector nu) { if (nu.frame_num != frame_num) { return true; } if (nu.pic_parameter_set_id != pic_parameter_set_id) { return true; } if (nu.field_pic_flag != field_pic_flag) { return true; } if (nu.field_pic_flag) { if (nu.bottom_field_flag != bottom_field_flag) { return true; } } if (nu.nal_ref_idc != nal_ref_idc) { return true; } if (nu.pic_order_cnt_type == 0 && pic_order_cnt_type == 0) { if (nu.pic_order_cnt_lsb != pic_order_cnt_lsb) { return true; } if (nu.delta_pic_order_cnt_bottom != delta_pic_order_cnt_bottom) { return true; } } if (nu.pic_order_cnt_type == 1 && pic_order_cnt_type == 1) { if (nu.delta_pic_order_cnt_0 != delta_pic_order_cnt_0) { return true; } if (nu.delta_pic_order_cnt_1 != delta_pic_order_cnt_1) { return true; } } if (nu.idrPicFlag != idrPicFlag) { return true; } if (nu.idrPicFlag && idrPicFlag) { if (nu.idr_pic_id != idr_pic_id) { return true; } } return false; } } FirstVclNalDetector fvnd = null; nal_loop: while ((nal = findNextNal(la)) != null) { H264NalUnitHeader nalUnitHeader = getNalUnitHeader(nal); switch (nalUnitHeader.nal_unit_type) { case H264NalUnitTypes.CODED_SLICE_NON_IDR: case H264NalUnitTypes.CODED_SLICE_DATA_PART_A: case H264NalUnitTypes.CODED_SLICE_DATA_PART_B: case H264NalUnitTypes.CODED_SLICE_DATA_PART_C: case H264NalUnitTypes.CODED_SLICE_IDR: FirstVclNalDetector current = new FirstVclNalDetector(nal, nalUnitHeader.nal_ref_idc, nalUnitHeader.nal_unit_type); if (fvnd != null && fvnd.isFirstInNew(current)) { LOG.debug("Wrapping up cause of first vcl nal is found"); createSample(buffered); } fvnd = current; //System.err.println("" + nalUnitHeader.nal_unit_type); buffered.add((ByteBuffer) nal.rewind()); //log.finer("NAL Unit Type: " + nalUnitHeader.nal_unit_type + " " + fvnd.frame_num); break; case H264NalUnitTypes.SEI: if (fvnd != null) { LOG.debug("Wrapping up cause of SEI after vcl marks new sample"); createSample(buffered); fvnd = null; } seiMessage = new SEIMessage(cleanBuffer(new ByteBufferBackedInputStream(nal)), currentSeqParameterSet); //System.err.println("" + nalUnitHeader.nal_unit_type); buffered.add(nal); break; case H264NalUnitTypes.AU_UNIT_DELIMITER: if (fvnd != null) { LOG.debug("Wrapping up cause of AU after vcl marks new sample"); createSample(buffered); fvnd = null; } //System.err.println("" + nalUnitHeader.nal_unit_type); buffered.add(nal); break; case H264NalUnitTypes.SEQ_PARAMETER_SET: if (fvnd != null) { LOG.debug("Wrapping up cause of SPS after vcl marks new sample"); createSample(buffered); fvnd = null; } handleSPS((ByteBuffer) nal.rewind()); break; case 8: if (fvnd != null) { LOG.debug("Wrapping up cause of PPS after vcl marks new sample"); createSample(buffered); fvnd = null; } handlePPS((ByteBuffer) nal.rewind()); break; case H264NalUnitTypes.END_OF_SEQUENCE: case H264NalUnitTypes.END_OF_STREAM: break nal_loop; case H264NalUnitTypes.SEQ_PARAMETER_SET_EXT: throw new RuntimeException("Sequence parameter set extension is not yet handled. Needs TLC."); default: // buffered.add(nal); LOG.warn("Unknown NAL unit type: " + nalUnitHeader.nal_unit_type); } } if (buffered.size() > 0) { createSample(buffered); } calcCtts(); decodingTimes = new long[samples.size()]; Arrays.fill(decodingTimes, frametick); return true; } public void calcCtts() { int pTime = 0; int lastPoc = -1; for (int j = 0; j < pictureOrderCounts.length; j++) { int minIndex = 0; int minValue = Integer.MAX_VALUE; for (int i = Math.max(0, j - 128); i < Math.min(pictureOrderCounts.length, j + 128); i++) { if (pictureOrderCounts[i] > lastPoc && pictureOrderCounts[i] < minValue) { minIndex = i; minValue = pictureOrderCounts[i]; } } lastPoc = pictureOrderCounts[minIndex]; pictureOrderCounts[minIndex] = pTime++; } for (int i = 0; i < pictureOrderCounts.length; i++) { ctts.add(new CompositionTimeToSample.Entry(1, pictureOrderCounts[i] - i)); } pictureOrderCounts = new int[0]; } long getSize(List<ByteBuffer> buffered) { long i = 0; for (ByteBuffer byteBuffer : buffered) { i += byteBuffer.remaining(); } return i; } private void createSample(List<ByteBuffer> buffered) throws IOException { SampleDependencyTypeBox.Entry sampleDependency = new SampleDependencyTypeBox.Entry(0); boolean IdrPicFlag = false; H264NalUnitHeader nu = null; ByteBuffer slice = null; for (ByteBuffer nal : buffered) { H264NalUnitHeader _nu = getNalUnitHeader(nal); switch (_nu.nal_unit_type) { case H264NalUnitTypes.CODED_SLICE_IDR: IdrPicFlag = true; case H264NalUnitTypes.CODED_SLICE_NON_IDR: case H264NalUnitTypes.CODED_SLICE_DATA_PART_A: case H264NalUnitTypes.CODED_SLICE_DATA_PART_B: case H264NalUnitTypes.CODED_SLICE_DATA_PART_C: nu = _nu; slice = nal; } } if (nu == null) { LOG.warn("Sample without Slice"); return; } assert slice != null; if (IdrPicFlag) { calcCtts(); } // cleans the buffer we just added InputStream bs = cleanBuffer(new ByteBufferBackedInputStream(slice)); SliceHeader sh = new SliceHeader(bs, spsIdToSps, ppsIdToPps, IdrPicFlag); if ((sh.slice_type == SliceHeader.SliceType.I) || (sh.slice_type == SliceHeader.SliceType.SI)) { isize += getSize(buffered); icount++; } else if ((sh.slice_type == SliceHeader.SliceType.P) || (sh.slice_type == SliceHeader.SliceType.SP)) { psize += getSize(buffered); pcount++; } else if ((sh.slice_type == SliceHeader.SliceType.B)) { bsize += getSize(buffered); bcount++; } else { throw new RuntimeException("_sdjlfd"); } if (nu.nal_ref_idc == 0) { sampleDependency.setSampleIsDependedOn(2); } else { sampleDependency.setSampleIsDependedOn(1); } if ((sh.slice_type == SliceHeader.SliceType.I) || (sh.slice_type == SliceHeader.SliceType.SI)) { sampleDependency.setSampleDependsOn(2); } else { sampleDependency.setSampleDependsOn(1); } Sample bb = createSampleObject(buffered); // LOG.fine("Adding sample with size " + bb.capacity() + " and header " + sh); buffered.clear(); if (seiMessage == null || seiMessage.n_frames == 0) { frameNrInGop = 0; } if (sh.sps.pic_order_cnt_type == 0) { int max_pic_order_count_lsb = (1 << (sh.sps.log2_max_pic_order_cnt_lsb_minus4 + 4)); // System.out.print(" pic_order_cnt_lsb " + pic_order_cnt_lsb + " " + max_pic_order_count); int picOrderCountLsb = sh.pic_order_cnt_lsb; int picOrderCntMsb; if ((picOrderCountLsb < prevPicOrderCntLsb) && ((prevPicOrderCntLsb - picOrderCountLsb) >= (max_pic_order_count_lsb / 2))) { picOrderCntMsb = prevPicOrderCntMsb + max_pic_order_count_lsb; } else if ((picOrderCountLsb > prevPicOrderCntLsb) && ((picOrderCountLsb - prevPicOrderCntLsb) > (max_pic_order_count_lsb / 2))) { picOrderCntMsb = prevPicOrderCntMsb - max_pic_order_count_lsb; } else { picOrderCntMsb = prevPicOrderCntMsb; } pictureOrderCounts = Mp4Arrays.copyOfAndAppend(pictureOrderCounts, picOrderCntMsb + picOrderCountLsb); prevPicOrderCntLsb = picOrderCountLsb; prevPicOrderCntMsb = picOrderCntMsb; } else if (sh.sps.pic_order_cnt_type == 1) { /*if (seiMessage != null && seiMessage.clock_timestamp_flag) { offset = seiMessage.n_frames - frameNrInGop; } else if (seiMessage != null && seiMessage.removal_delay_flag) { offset = seiMessage.dpb_removal_delay / 2; } if (seiMessage == null) { LOG.warning("CTS timing in ctts box is most likely not OK"); }*/ throw new RuntimeException("pic_order_cnt_type == 1 needs to be implemented"); } else if (sh.sps.pic_order_cnt_type == 2) { pictureOrderCounts = Mp4Arrays.copyOfAndAppend(pictureOrderCounts, samples.size()); } sdtp.add(sampleDependency); frameNrInGop++; samples.add(bb); if (IdrPicFlag) { // IDR Picture stss.add(samples.size()); } } private void handlePPS(ByteBuffer data) throws IOException { InputStream is = new ByteBufferBackedInputStream(data); is.read(); PictureParameterSet _pictureParameterSet = PictureParameterSet.read(is); if (firstPictureParameterSet == null) { firstPictureParameterSet = _pictureParameterSet; } currentPictureParameterSet = _pictureParameterSet; ByteBuffer oldPpsSameId = ppsIdToPpsBytes.get(_pictureParameterSet.pic_parameter_set_id); data.rewind(); if (oldPpsSameId != null && !oldPpsSameId.equals(data)) { throw new RuntimeException("OMG - I got two SPS with same ID but different settings! (AVC3 is the solution)"); } else { if (oldPpsSameId == null) { pictureParameterRangeMap.put(samples.size(), data); } ppsIdToPpsBytes.put(_pictureParameterSet.pic_parameter_set_id, data); ppsIdToPps.put(_pictureParameterSet.pic_parameter_set_id, _pictureParameterSet); } } private void handleSPS(ByteBuffer data) throws IOException { InputStream spsInputStream = cleanBuffer(new ByteBufferBackedInputStream(data)); spsInputStream.read(); SeqParameterSet _seqParameterSet = SeqParameterSet.read(spsInputStream); if (firstSeqParameterSet == null) { firstSeqParameterSet = _seqParameterSet; configureFramerate(); } currentSeqParameterSet = _seqParameterSet; data.rewind(); ByteBuffer oldSpsSameId = spsIdToSpsBytes.get(_seqParameterSet.seq_parameter_set_id); if (oldSpsSameId != null && !oldSpsSameId.equals(data)) { throw new RuntimeException("OMG - I got two SPS with same ID but different settings!"); } else { if (oldSpsSameId != null) { seqParameterRangeMap.put(samples.size(), data); } spsIdToSpsBytes.put(_seqParameterSet.seq_parameter_set_id, data); spsIdToSps.put(_seqParameterSet.seq_parameter_set_id, _seqParameterSet); } } private void configureFramerate() { if (determineFrameRate) { if (firstSeqParameterSet.vuiParams != null) { timescale = firstSeqParameterSet.vuiParams.time_scale >> 1; // Not sure why, but I found this in several places, and it works... frametick = firstSeqParameterSet.vuiParams.num_units_in_tick; if (timescale == 0 || frametick == 0) { LOG.warn("vuiParams contain invalid values: time_scale: " + timescale + " and frame_tick: " + frametick + ". Setting frame rate to 25fps"); timescale = 90000; frametick = 3600; } if (timescale / frametick > 100) { LOG.warn("Framerate is " + (timescale / frametick) + ". That is suspicious."); } } else { LOG.warn("Can't determine frame rate. Guessing 25 fps"); timescale = 90000; frametick = 3600; } } } public class ByteBufferBackedInputStream extends InputStream { private final ByteBuffer buf; public ByteBufferBackedInputStream(ByteBuffer buf) { // make a coy of the buffer this.buf = buf.duplicate(); } public int read() throws IOException { if (!buf.hasRemaining()) { return -1; } return buf.get() & 0xFF; } public int read(byte[] bytes, int off, int len) throws IOException { if (!buf.hasRemaining()) { return -1; } len = Math.min(len, buf.remaining()); buf.get(bytes, off, len); return len; } } }