/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.android.exoplayer.parser.webm; import com.google.android.exoplayer.MediaFormat; import com.google.android.exoplayer.SampleHolder; import com.google.android.exoplayer.parser.SegmentIndex; import com.google.android.exoplayer.upstream.NonBlockingInputStream; import com.google.android.exoplayer.util.LongArray; import com.google.android.exoplayer.util.MimeTypes; import android.annotation.TargetApi; import android.media.MediaExtractor; import java.util.Arrays; /** * Facilitates the extraction of data from the WebM container format with a * non-blocking, incremental parser based on {@link EbmlReader}. * * <p>WebM is a subset of the EBML elements defined for Matroska. More information about EBML and * Matroska is available <a href="http://www.matroska.org/technical/specs/index.html">here</a>. * More info about WebM is <a href="http://www.webmproject.org/code/specs/container/">here</a>. */ @TargetApi(16) public final class WebmExtractor extends EbmlReader { private static final String DOC_TYPE_WEBM = "webm"; private static final String CODEC_ID_VP9 = "V_VP9"; private static final int UNKNOWN = -1; // Element IDs private static final int ID_EBML = 0x1A45DFA3; private static final int ID_EBML_READ_VERSION = 0x42F7; private static final int ID_DOC_TYPE = 0x4282; private static final int ID_DOC_TYPE_READ_VERSION = 0x4285; private static final int ID_SEGMENT = 0x18538067; private static final int ID_INFO = 0x1549A966; private static final int ID_TIMECODE_SCALE = 0x2AD7B1; private static final int ID_DURATION = 0x4489; private static final int ID_CLUSTER = 0x1F43B675; private static final int ID_TIME_CODE = 0xE7; private static final int ID_SIMPLE_BLOCK = 0xA3; private static final int ID_TRACKS = 0x1654AE6B; private static final int ID_TRACK_ENTRY = 0xAE; private static final int ID_CODEC_ID = 0x86; private static final int ID_VIDEO = 0xE0; private static final int ID_PIXEL_WIDTH = 0xB0; private static final int ID_PIXEL_HEIGHT = 0xBA; private static final int ID_CUES = 0x1C53BB6B; private static final int ID_CUE_POINT = 0xBB; private static final int ID_CUE_TIME = 0xB3; private static final int ID_CUE_TRACK_POSITIONS = 0xB7; private static final int ID_CUE_CLUSTER_POSITION = 0xF1; // SimpleBlock Lacing Values private static final int LACING_NONE = 0; private static final int LACING_XIPH = 1; private static final int LACING_FIXED = 2; private static final int LACING_EBML = 3; private final byte[] simpleBlockTimecodeAndFlags = new byte[3]; private SampleHolder tempSampleHolder; private boolean sampleRead; private boolean prepared = false; private long segmentStartPosition = UNKNOWN; private long segmentEndPosition = UNKNOWN; private long timecodeScale = 1000000L; private long durationUs = UNKNOWN; private int pixelWidth = UNKNOWN; private int pixelHeight = UNKNOWN; private int cuesByteSize = UNKNOWN; private long clusterTimecodeUs = UNKNOWN; private long simpleBlockTimecodeUs = UNKNOWN; private MediaFormat format; private SegmentIndex cues; private LongArray cueTimesUs; private LongArray cueClusterPositions; public WebmExtractor() { cueTimesUs = new LongArray(); cueClusterPositions = new LongArray(); } /** * Whether the has parsed the cues and sample format from the stream. * * @return True if the extractor is prepared. False otherwise. */ public boolean isPrepared() { return prepared; } /** * Consumes data from a {@link NonBlockingInputStream}. * * <p>If the return value is {@code false}, then a sample may have been partially read into * {@code sampleHolder}. Hence the same {@link SampleHolder} instance must be passed * in subsequent calls until the whole sample has been read. * * @param inputStream The input stream from which data should be read. * @param sampleHolder A {@link SampleHolder} into which the sample should be read. * @return {@code true} if a sample has been read into the sample holder, otherwise {@code false}. */ public boolean read(NonBlockingInputStream inputStream, SampleHolder sampleHolder) { tempSampleHolder = sampleHolder; sampleRead = false; super.read(inputStream); tempSampleHolder = null; return sampleRead; } /** * Seeks to a position before or equal to the requested time. * * @param seekTimeUs The desired seek time in microseconds. * @param allowNoop Allow the seek operation to do nothing if the seek time is in the current * segment, is equal to or greater than the time of the current sample, and if there does not * exist a sync frame between these two times. * @return True if the operation resulted in a change of state. False if it was a no-op. */ public boolean seekTo(long seekTimeUs, boolean allowNoop) { checkPrepared(); if (allowNoop && simpleBlockTimecodeUs != UNKNOWN && seekTimeUs >= simpleBlockTimecodeUs) { final int clusterIndex = Arrays.binarySearch(cues.timesUs, clusterTimecodeUs); if (clusterIndex >= 0 && seekTimeUs < clusterTimecodeUs + cues.durationsUs[clusterIndex]) { return false; } } reset(); return true; } /** * Returns the cues for the media stream. * * @return The cues in the form of a {@link SegmentIndex}, or null if the extractor is not yet * prepared. */ public SegmentIndex getCues() { checkPrepared(); return cues; } /** * Returns the format of the samples contained within the media stream. * * @return The sample media format, or null if the extracted is not yet prepared. */ public MediaFormat getFormat() { checkPrepared(); return format; } @Override protected int getElementType(int id) { switch (id) { case ID_EBML: case ID_SEGMENT: case ID_INFO: case ID_CLUSTER: case ID_TRACKS: case ID_TRACK_ENTRY: case ID_VIDEO: case ID_CUES: case ID_CUE_POINT: case ID_CUE_TRACK_POSITIONS: return EbmlReader.TYPE_MASTER; case ID_EBML_READ_VERSION: case ID_DOC_TYPE_READ_VERSION: case ID_TIMECODE_SCALE: case ID_TIME_CODE: case ID_PIXEL_WIDTH: case ID_PIXEL_HEIGHT: case ID_CUE_TIME: case ID_CUE_CLUSTER_POSITION: return EbmlReader.TYPE_UNSIGNED_INT; case ID_DOC_TYPE: case ID_CODEC_ID: return EbmlReader.TYPE_STRING; case ID_SIMPLE_BLOCK: return EbmlReader.TYPE_BINARY; case ID_DURATION: return EbmlReader.TYPE_FLOAT; default: return EbmlReader.TYPE_UNKNOWN; } } @Override protected boolean onMasterElementStart( int id, long elementOffset, int headerSize, int contentsSize) { switch (id) { case ID_SEGMENT: if (segmentStartPosition != UNKNOWN || segmentEndPosition != UNKNOWN) { throw new IllegalStateException("Multiple Segment elements not supported"); } segmentStartPosition = elementOffset + headerSize; segmentEndPosition = elementOffset + headerSize + contentsSize; break; case ID_CUES: cuesByteSize = headerSize + contentsSize; break; } return true; } @Override protected boolean onMasterElementEnd(int id) { switch (id) { case ID_CUES: finishPreparing(); return false; } return true; } @Override protected boolean onIntegerElement(int id, long value) { switch (id) { case ID_EBML_READ_VERSION: // Validate that EBMLReadVersion is supported. This extractor only supports v1. if (value != 1) { throw new IllegalStateException("EBMLReadVersion " + value + " not supported"); } break; case ID_DOC_TYPE_READ_VERSION: // Validate that DocTypeReadVersion is supported. This extractor only supports up to v2. if (value < 1 || value > 2) { throw new IllegalStateException("DocTypeReadVersion " + value + " not supported"); } break; case ID_TIMECODE_SCALE: timecodeScale = value; break; case ID_PIXEL_WIDTH: pixelWidth = (int) value; break; case ID_PIXEL_HEIGHT: pixelHeight = (int) value; break; case ID_CUE_TIME: cueTimesUs.add(scaleTimecodeToUs(value)); break; case ID_CUE_CLUSTER_POSITION: cueClusterPositions.add(value); break; case ID_TIME_CODE: clusterTimecodeUs = scaleTimecodeToUs(value); break; } return true; } @Override protected boolean onFloatElement(int id, double value) { switch (id) { case ID_DURATION: durationUs = scaleTimecodeToUs(value); break; } return true; } @Override protected boolean onStringElement(int id, String value) { switch (id) { case ID_DOC_TYPE: // Validate that DocType is supported. This extractor only supports "webm". if (!DOC_TYPE_WEBM.equals(value)) { throw new IllegalStateException("DocType " + value + " not supported"); } break; case ID_CODEC_ID: // Validate that CodecID is supported. This extractor only supports "V_VP9". if (!CODEC_ID_VP9.equals(value)) { throw new IllegalStateException("CodecID " + value + " not supported"); } break; } return true; } @Override protected boolean onBinaryElement(NonBlockingInputStream inputStream, int id, long elementOffset, int headerSize, int contentsSize) { switch (id) { case ID_SIMPLE_BLOCK: // Please refer to http://www.matroska.org/technical/specs/index.html#simpleblock_structure // for info about how data is organized in a SimpleBlock element. // Value of trackNumber is not used but needs to be read. readVarint(inputStream); // Next three bytes have timecode and flags. readBytes(inputStream, simpleBlockTimecodeAndFlags, 3); // First two bytes of the three are the relative timecode. final int timecode = (simpleBlockTimecodeAndFlags[0] << 8) | (simpleBlockTimecodeAndFlags[1] & 0xff); final long timecodeUs = scaleTimecodeToUs(timecode); // Last byte of the three has some flags and the lacing value. final boolean keyframe = (simpleBlockTimecodeAndFlags[2] & 0x80) == 0x80; final boolean invisible = (simpleBlockTimecodeAndFlags[2] & 0x08) == 0x08; final int lacing = (simpleBlockTimecodeAndFlags[2] & 0x06) >> 1; //final boolean discardable = (simpleBlockTimecodeAndFlags[2] & 0x01) == 0x01; // Not used. // Validate lacing and set info into sample holder. switch (lacing) { case LACING_NONE: final long elementEndOffset = elementOffset + headerSize + contentsSize; simpleBlockTimecodeUs = clusterTimecodeUs + timecodeUs; tempSampleHolder.flags = keyframe ? MediaExtractor.SAMPLE_FLAG_SYNC : 0; tempSampleHolder.decodeOnly = invisible; tempSampleHolder.timeUs = clusterTimecodeUs + timecodeUs; tempSampleHolder.size = (int) (elementEndOffset - getBytesRead()); break; case LACING_EBML: case LACING_FIXED: case LACING_XIPH: default: throw new IllegalStateException("Lacing mode " + lacing + " not supported"); } // Read video data into sample holder. readBytes(inputStream, tempSampleHolder.data, tempSampleHolder.size); sampleRead = true; return false; default: skipBytes(inputStream, contentsSize); } return true; } private long scaleTimecodeToUs(long unscaledTimecode) { return (unscaledTimecode * timecodeScale) / 1000L; } private long scaleTimecodeToUs(double unscaledTimecode) { return (long) ((unscaledTimecode * timecodeScale) / 1000.0); } private void checkPrepared() { if (!prepared) { throw new IllegalStateException("Parser not yet prepared"); } } private void finishPreparing() { if (prepared || segmentStartPosition == UNKNOWN || segmentEndPosition == UNKNOWN || durationUs == UNKNOWN || pixelWidth == UNKNOWN || pixelHeight == UNKNOWN || cuesByteSize == UNKNOWN || cueTimesUs.size() == 0 || cueTimesUs.size() != cueClusterPositions.size()) { throw new IllegalStateException("Incorrect state in finishPreparing()"); } format = MediaFormat.createVideoFormat(MimeTypes.VIDEO_VP9, MediaFormat.NO_VALUE, pixelWidth, pixelHeight, null); final int cuePointsSize = cueTimesUs.size(); final int sizeBytes = cuesByteSize; final int[] sizes = new int[cuePointsSize]; final long[] offsets = new long[cuePointsSize]; final long[] durationsUs = new long[cuePointsSize]; final long[] timesUs = new long[cuePointsSize]; for (int i = 0; i < cuePointsSize; i++) { timesUs[i] = cueTimesUs.get(i); offsets[i] = segmentStartPosition + cueClusterPositions.get(i); } for (int i = 0; i < cuePointsSize - 1; i++) { sizes[i] = (int) (offsets[i + 1] - offsets[i]); durationsUs[i] = timesUs[i + 1] - timesUs[i]; } sizes[cuePointsSize - 1] = (int) (segmentEndPosition - offsets[cuePointsSize - 1]); durationsUs[cuePointsSize - 1] = durationUs - timesUs[cuePointsSize - 1]; cues = new SegmentIndex(sizeBytes, sizes, offsets, durationsUs, timesUs); cueTimesUs = null; cueClusterPositions = null; prepared = true; } }