/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.android.exoplayer.text.ttml; import com.google.android.exoplayer.text.Subtitle; import com.google.android.exoplayer.text.SubtitleParser; import com.google.android.exoplayer.util.MimeTypes; import android.util.Log; import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; import org.xmlpull.v1.XmlPullParserFactory; import java.io.IOException; import java.io.InputStream; import java.util.LinkedList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * A simple TTML parser that supports DFXP presentation profile. * <p> * Supported features in this parser are: * <ul> * <li>content * <li>core * <li>presentation * <li>profile * <li>structure * <li>time-offset * <li>timing * <li>tickRate * <li>time-clock-with-frames * <li>time-clock * <li>time-offset-with-frames * <li>time-offset-with-ticks * </ul> * </p> * @see <a href="http://www.w3.org/TR/ttaf1-dfxp/">TTML specification</a> */ public class TtmlParser implements SubtitleParser { private static final String TAG = "TtmlParser"; private static final String ATTR_BEGIN = "begin"; private static final String ATTR_DURATION = "dur"; private static final String ATTR_END = "end"; private static final Pattern CLOCK_TIME = Pattern.compile("^([0-9][0-9]+):([0-9][0-9]):([0-9][0-9])" + "(?:(\\.[0-9]+)|:([0-9][0-9])(?:\\.([0-9]+))?)?$"); private static final Pattern OFFSET_TIME = Pattern.compile("^([0-9]+(?:\\.[0-9]+)?)(h|m|s|ms|f|t)$"); // TODO: read and apply the following attributes if specified. private static final int DEFAULT_FRAMERATE = 30; private static final int DEFAULT_SUBFRAMERATE = 1; private static final int DEFAULT_TICKRATE = 1; private final XmlPullParserFactory xmlParserFactory; public TtmlParser() { try { xmlParserFactory = XmlPullParserFactory.newInstance(); } catch (XmlPullParserException e) { throw new RuntimeException("Couldn't create XmlPullParserFactory instance", e); } } @Override public Subtitle parse(InputStream inputStream, String inputEncoding, long startTimeUs) throws IOException { try { XmlPullParser xmlParser = xmlParserFactory.newPullParser(); xmlParser.setInput(inputStream, inputEncoding); TtmlSubtitle ttmlSubtitle = null; LinkedList<TtmlNode> nodeStack = new LinkedList<TtmlNode>(); int unsupportedTagDepth = 0; int eventType = xmlParser.getEventType(); while (eventType != XmlPullParser.END_DOCUMENT) { TtmlNode parent = nodeStack.peekLast(); if (unsupportedTagDepth == 0) { String name = xmlParser.getName(); if (eventType == XmlPullParser.START_TAG) { if (!isSupportedTag(name)) { Log.w(TAG, "Ignoring unsupported tag: " + xmlParser.getName()); unsupportedTagDepth++; } else { TtmlNode node = parseNode(xmlParser, parent); nodeStack.addLast(node); if (parent != null) { parent.addChild(node); } } } else if (eventType == XmlPullParser.TEXT) { parent.addChild(TtmlNode.buildTextNode(xmlParser.getText())); } else if (eventType == XmlPullParser.END_TAG) { if (xmlParser.getName().equals(TtmlNode.TAG_TT)) { ttmlSubtitle = new TtmlSubtitle(nodeStack.getLast(), startTimeUs); } nodeStack.removeLast(); } } else { if (eventType == XmlPullParser.START_TAG) { unsupportedTagDepth++; } else if (eventType == XmlPullParser.END_TAG) { unsupportedTagDepth--; } } xmlParser.next(); eventType = xmlParser.getEventType(); } return ttmlSubtitle; } catch (XmlPullParserException xppe) { throw new IOException("Unable to parse source", xppe); } } @Override public boolean canParse(String mimeType) { return MimeTypes.APPLICATION_TTML.equals(mimeType); } private TtmlNode parseNode(XmlPullParser parser, TtmlNode parent) { long duration = 0; long startTime = TtmlNode.UNDEFINED_TIME; long endTime = TtmlNode.UNDEFINED_TIME; int attributeCount = parser.getAttributeCount(); for (int i = 0; i < attributeCount; i++) { // TODO: check if it's safe to ignore the namespace of attributes as follows. String attr = parser.getAttributeName(i).replaceFirst("^.*:", ""); String value = parser.getAttributeValue(i); if (attr.equals(ATTR_BEGIN)) { startTime = parseTimeExpression(value, DEFAULT_FRAMERATE, DEFAULT_SUBFRAMERATE, DEFAULT_TICKRATE); } else if (attr.equals(ATTR_END)) { endTime = parseTimeExpression(value, DEFAULT_FRAMERATE, DEFAULT_SUBFRAMERATE, DEFAULT_TICKRATE); } else if (attr.equals(ATTR_DURATION)) { duration = parseTimeExpression(value, DEFAULT_FRAMERATE, DEFAULT_SUBFRAMERATE, DEFAULT_TICKRATE); } else { // Do nothing. } } if (parent != null && parent.startTimeUs != TtmlNode.UNDEFINED_TIME) { if (startTime != TtmlNode.UNDEFINED_TIME) { startTime += parent.startTimeUs; } if (endTime != TtmlNode.UNDEFINED_TIME) { endTime += parent.startTimeUs; } } if (endTime == TtmlNode.UNDEFINED_TIME) { if (duration > 0) { // Infer the end time from the duration. endTime = startTime + duration; } else if (parent != null && parent.endTimeUs != TtmlNode.UNDEFINED_TIME) { // If the end time remains unspecified, then it should be inherited from the parent. endTime = parent.endTimeUs; } } return TtmlNode.buildNode(parser.getName(), startTime, endTime); } private static boolean isSupportedTag(String tag) { if (tag.equals(TtmlNode.TAG_TT) || tag.equals(TtmlNode.TAG_HEAD) || tag.equals(TtmlNode.TAG_BODY) || tag.equals(TtmlNode.TAG_DIV) || tag.equals(TtmlNode.TAG_P) || tag.equals(TtmlNode.TAG_SPAN) || tag.equals(TtmlNode.TAG_BR) || tag.equals(TtmlNode.TAG_STYLE) || tag.equals(TtmlNode.TAG_STYLING) || tag.equals(TtmlNode.TAG_LAYOUT) || tag.equals(TtmlNode.TAG_REGION) || tag.equals(TtmlNode.TAG_METADATA) || tag.equals(TtmlNode.TAG_SMPTE_IMAGE) || tag.equals(TtmlNode.TAG_SMPTE_DATA) || tag.equals(TtmlNode.TAG_SMPTE_INFORMATION)) { return true; } return false; } /** * Parses a time expression, returning the parsed timestamp. * <p> * For the format of a time expression, see: * <a href="http://www.w3.org/TR/ttaf1-dfxp/#timing-value-timeExpression">timeExpression</a> * * @param time A string that includes the time expression. * @param frameRate The framerate of the stream. * @param subframeRate The sub-framerate of the stream * @param tickRate The tick rate of the stream. * @return The parsed timestamp in microseconds. * @throws NumberFormatException If the given string does not contain a valid time expression. */ private static long parseTimeExpression(String time, int frameRate, int subframeRate, int tickRate) { Matcher matcher = CLOCK_TIME.matcher(time); if (matcher.matches()) { String hours = matcher.group(1); double durationSeconds = Long.parseLong(hours) * 3600; String minutes = matcher.group(2); durationSeconds += Long.parseLong(minutes) * 60; String seconds = matcher.group(3); durationSeconds += Long.parseLong(seconds); String fraction = matcher.group(4); durationSeconds += (fraction != null) ? Double.parseDouble(fraction) : 0; String frames = matcher.group(5); durationSeconds += (frames != null) ? ((double) Long.parseLong(frames)) / frameRate : 0; String subframes = matcher.group(6); durationSeconds += (subframes != null) ? ((double) Long.parseLong(subframes)) / subframeRate / frameRate : 0; return (long) (durationSeconds * 1000000); } matcher = OFFSET_TIME.matcher(time); if (matcher.matches()) { String timeValue = matcher.group(1); double value = Double.parseDouble(timeValue); String unit = matcher.group(2); if (unit.equals("h")) { value *= 3600L * 1000000L; } else if (unit.equals("m")) { value *= 60 * 1000000; } else if (unit.equals("s")) { value *= 1000000; } else if (unit.equals("ms")) { value *= 1000; } else if (unit.equals("f")) { value = value / frameRate * 1000000; } else if (unit.equals("t")) { value = value / tickRate * 1000000; } return (long) value; } throw new NumberFormatException("Malformed time expression: " + time); } }