/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.android.exoplayer.text.ttml;
import com.google.android.exoplayer.C;
import com.google.android.exoplayer.ParserException;
import com.google.android.exoplayer.text.Subtitle;
import com.google.android.exoplayer.text.SubtitleParser;
import com.google.android.exoplayer.util.MimeTypes;
import com.google.android.exoplayer.util.ParserUtil;
import com.google.android.exoplayer.util.Util;
import android.graphics.Color;
import android.text.Layout;
import android.util.Log;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A simple TTML parser that supports DFXP presentation profile.
* <p>
* Supported features in this parser are:
* <ul>
* <li>content
* <li>core
* <li>presentation
* <li>profile
* <li>structure
* <li>time-offset
* <li>timing
* <li>tickRate
* <li>time-clock-with-frames
* <li>time-clock
* <li>time-offset-with-frames
* <li>time-offset-with-ticks
* </ul>
* </p>
* @see <a href="http://www.w3.org/TR/ttaf1-dfxp/">TTML specification</a>
*/
public final class TtmlParser implements SubtitleParser {
private static final String TAG = "TtmlParser";
private static final String ATTR_BEGIN = "begin";
private static final String ATTR_DURATION = "dur";
private static final String ATTR_END = "end";
private static final String ATTR_STYLE = "style";
private static final Pattern CLOCK_TIME =
Pattern.compile("^([0-9][0-9]+):([0-9][0-9]):([0-9][0-9])"
+ "(?:(\\.[0-9]+)|:([0-9][0-9])(?:\\.([0-9]+))?)?$");
private static final Pattern OFFSET_TIME =
Pattern.compile("^([0-9]+(?:\\.[0-9]+)?)(h|m|s|ms|f|t)$");
// TODO: read and apply the following attributes if specified.
private static final int DEFAULT_FRAMERATE = 30;
private static final int DEFAULT_SUBFRAMERATE = 1;
private static final int DEFAULT_TICKRATE = 1;
private final XmlPullParserFactory xmlParserFactory;
private final boolean strictParsing;
/**
* Equivalent to {@code TtmlParser(false)}.
*/
public TtmlParser() {
this(false);
}
/**
* @param strictParsing If true, {@link #parse(InputStream)} will throw a {@link ParserException}
* if the stream contains invalid data. If false, the parser will make a best effort to ignore
* minor errors in the stream. Note however that a {@link ParserException} will still be
* thrown when this is not possible.
*/
public TtmlParser(boolean strictParsing) {
this.strictParsing = strictParsing;
try {
xmlParserFactory = XmlPullParserFactory.newInstance();
} catch (XmlPullParserException e) {
throw new RuntimeException("Couldn't create XmlPullParserFactory instance", e);
}
}
@Override
public Subtitle parse(InputStream inputStream) throws IOException {
try {
XmlPullParser xmlParser = xmlParserFactory.newPullParser();
Map<String, TtmlStyle> globalStyles = new HashMap<>();
xmlParser.setInput(inputStream, null);
TtmlSubtitle ttmlSubtitle = null;
LinkedList<TtmlNode> nodeStack = new LinkedList<>();
int unsupportedNodeDepth = 0;
int eventType = xmlParser.getEventType();
while (eventType != XmlPullParser.END_DOCUMENT) {
TtmlNode parent = nodeStack.peekLast();
if (unsupportedNodeDepth == 0) {
String name = xmlParser.getName();
if (eventType == XmlPullParser.START_TAG) {
if (!isSupportedTag(name)) {
Log.i(TAG, "Ignoring unsupported tag: " + xmlParser.getName());
unsupportedNodeDepth++;
} else if (TtmlNode.TAG_HEAD.equals(name)) {
parseHeader(xmlParser, globalStyles);
} else {
try {
TtmlNode node = parseNode(xmlParser, parent, globalStyles);
nodeStack.addLast(node);
if (parent != null) {
parent.addChild(node);
}
} catch (ParserException e) {
if (strictParsing) {
throw e;
} else {
Log.w(TAG, "Suppressing parser error", e);
// Treat the node (and by extension, all of its children) as unsupported.
unsupportedNodeDepth++;
}
}
}
} else if (eventType == XmlPullParser.TEXT) {
parent.addChild(TtmlNode.buildTextNode(xmlParser.getText(), parent.style));
} else if (eventType == XmlPullParser.END_TAG) {
if (xmlParser.getName().equals(TtmlNode.TAG_TT)) {
ttmlSubtitle = new TtmlSubtitle(nodeStack.getLast());
}
nodeStack.removeLast();
}
} else {
if (eventType == XmlPullParser.START_TAG) {
unsupportedNodeDepth++;
} else if (eventType == XmlPullParser.END_TAG) {
unsupportedNodeDepth--;
}
}
xmlParser.next();
eventType = xmlParser.getEventType();
}
return ttmlSubtitle;
} catch (XmlPullParserException xppe) {
throw new ParserException("Unable to parse source", xppe);
}
}
private Map<String, TtmlStyle> parseHeader(XmlPullParser xmlParser,
Map<String, TtmlStyle> globalStyles)
throws IOException, XmlPullParserException {
do {
xmlParser.next();
if (ParserUtil.isStartTag(xmlParser, TtmlNode.TAG_STYLE)) {
String parentStyleId = xmlParser.getAttributeValue(null, ATTR_STYLE);
TtmlStyle style = parseStyleAttributes(xmlParser, new TtmlStyle());
if (parentStyleId != null) {
String[] ids = parentStyleId.split(" ");
for (int i = 0; i < ids.length; i++) {
style.chain(globalStyles.get(ids[i]));
}
}
if (style.getId() != null) {
globalStyles.put(style.getId(), style);
}
}
} while (!ParserUtil.isEndTag(xmlParser, TtmlNode.TAG_HEAD));
return globalStyles;
}
private TtmlStyle parseStyleAttributes(XmlPullParser parser, TtmlStyle style) {
int attributeCount = parser.getAttributeCount();
for (int i = 0; i < attributeCount; i++) {
String attributeName = parser.getAttributeName(i);
String attributeValue = parser.getAttributeValue(i);
switch (ParserUtil.removeNamespacePrefix(attributeName)) {
case TtmlNode.ATTR_ID:
if (TtmlNode.TAG_STYLE.equals(parser.getName())) {
style = createIfNull(style).setId(attributeValue);
}
break;
case TtmlNode.ATTR_TTS_BACKGROUND_COLOR:
style = createIfNull(style);
try {
style.setBackgroundColor(Color.parseColor(attributeValue));
} catch (IllegalArgumentException e) {
Log.w(TAG, "failed parsing background value: '" + attributeValue + "'");
}
break;
case TtmlNode.ATTR_TTS_COLOR:
style = createIfNull(style);
try {
style.setColor(Color.parseColor(attributeValue));
} catch (IllegalArgumentException e) {
Log.w(TAG, "failed parsing color value: '" + attributeValue + "'");
}
break;
case TtmlNode.ATTR_TTS_FONT_FAMILY:
style = createIfNull(style).setFontFamily(attributeValue);
break;
case TtmlNode.ATTR_TTS_FONT_SIZE:
// TODO: handle size
break;
case TtmlNode.ATTR_TTS_FONT_WEIGHT:
style = createIfNull(style).setBold(
TtmlNode.BOLD.equalsIgnoreCase(attributeValue));
break;
case TtmlNode.ATTR_TTS_FONT_STYLE:
style = createIfNull(style).setItalic(
TtmlNode.ITALIC.equalsIgnoreCase(attributeValue));
break;
case TtmlNode.ATTR_TTS_TEXT_ALIGN:
switch (Util.toLowerInvariant(attributeValue)) {
case TtmlNode.LEFT:
style = createIfNull(style).setTextAlign(Layout.Alignment.ALIGN_NORMAL);
break;
case TtmlNode.START:
style = createIfNull(style).setTextAlign(Layout.Alignment.ALIGN_NORMAL);
break;
case TtmlNode.RIGHT:
style = createIfNull(style).setTextAlign(Layout.Alignment.ALIGN_OPPOSITE);
break;
case TtmlNode.END:
style = createIfNull(style).setTextAlign(Layout.Alignment.ALIGN_OPPOSITE);
break;
case TtmlNode.CENTER:
style = createIfNull(style).setTextAlign(Layout.Alignment.ALIGN_CENTER);
break;
}
break;
case TtmlNode.ATTR_TTS_TEXT_DECORATION:
switch (Util.toLowerInvariant(attributeValue)) {
case TtmlNode.LINETHROUGH:
style = createIfNull(style).setLinethrough(true);
break;
case TtmlNode.NO_LINETHROUGH:
style = createIfNull(style).setLinethrough(false);
break;
case TtmlNode.UNDERLINE:
style = createIfNull(style).setUnderline(true);
break;
case TtmlNode.NO_UNDERLINE:
style = createIfNull(style).setUnderline(false);
break;
}
break;
default:
// ignore
break;
}
}
return style;
}
private TtmlStyle createIfNull(TtmlStyle style) {
return style == null ? new TtmlStyle() : style;
}
@Override
public boolean canParse(String mimeType) {
return MimeTypes.APPLICATION_TTML.equals(mimeType);
}
private TtmlNode parseNode(XmlPullParser parser, TtmlNode parent,
Map<String, TtmlStyle> globalStyles) throws ParserException {
long duration = 0;
long startTime = TtmlNode.UNDEFINED_TIME;
long endTime = TtmlNode.UNDEFINED_TIME;
int attributeCount = parser.getAttributeCount();
TtmlStyle style = parseStyleAttributes(parser, null);
boolean hasInlineStyles = style != null;
if (parent != null && parent.style != null) {
if (hasInlineStyles) {
style.inherit(parent.style);
} else {
style = parent.style.getInheritableStyle();
}
}
for (int i = 0; i < attributeCount; i++) {
// TODO: check if it is safe to remove the namespace prefix
String attr = ParserUtil.removeNamespacePrefix(parser.getAttributeName(i));
String value = parser.getAttributeValue(i);
if (attr.equals(ATTR_BEGIN)) {
startTime = parseTimeExpression(value,
DEFAULT_FRAMERATE, DEFAULT_SUBFRAMERATE, DEFAULT_TICKRATE);
} else if (attr.equals(ATTR_END)) {
endTime = parseTimeExpression(value,
DEFAULT_FRAMERATE, DEFAULT_SUBFRAMERATE, DEFAULT_TICKRATE);
} else if (attr.equals(ATTR_DURATION)) {
duration = parseTimeExpression(value,
DEFAULT_FRAMERATE, DEFAULT_SUBFRAMERATE, DEFAULT_TICKRATE);
} else if (attr.equals(ATTR_STYLE)) {
// IDREFS: potentially multiple space delimited ids
String[] ids = value.split(" ");
if (style == null) {
// use global style without overriding
if (ids.length == 1) {
style = globalStyles.get(value);
} else if (ids.length > 1){
style = new TtmlStyle();
for (int j = 0; j < ids.length; j++) {
style.chain(globalStyles.get(ids[j]));
}
}
} else if (hasInlineStyles) {
// local attributes inherits from global style
for (int j = 0; j < ids.length; j++) {
style.chain(globalStyles.get(ids[j]));
}
} else if (ids.length > 1 || (ids.length == 1 && style != globalStyles.get(ids[0]))) {
// merge global style and parent styles
TtmlStyle inheritedStyles = style;
style = new TtmlStyle();
for (int j = 0; j < ids.length; j++) {
style.chain(globalStyles.get(ids[j]));
}
style.inherit(inheritedStyles);
}
} else {
// Do nothing.
}
}
if (parent != null && parent.startTimeUs != TtmlNode.UNDEFINED_TIME) {
if (startTime != TtmlNode.UNDEFINED_TIME) {
startTime += parent.startTimeUs;
}
if (endTime != TtmlNode.UNDEFINED_TIME) {
endTime += parent.startTimeUs;
}
}
if (endTime == TtmlNode.UNDEFINED_TIME) {
if (duration > 0) {
// Infer the end time from the duration.
endTime = startTime + duration;
} else if (parent != null && parent.endTimeUs != TtmlNode.UNDEFINED_TIME) {
// If the end time remains unspecified, then it should be inherited from the parent.
endTime = parent.endTimeUs;
}
}
return TtmlNode.buildNode(parser.getName(), startTime, endTime, style);
}
private static boolean isSupportedTag(String tag) {
if (tag.equals(TtmlNode.TAG_TT)
|| tag.equals(TtmlNode.TAG_HEAD)
|| tag.equals(TtmlNode.TAG_BODY)
|| tag.equals(TtmlNode.TAG_DIV)
|| tag.equals(TtmlNode.TAG_P)
|| tag.equals(TtmlNode.TAG_SPAN)
|| tag.equals(TtmlNode.TAG_BR)
|| tag.equals(TtmlNode.TAG_STYLE)
|| tag.equals(TtmlNode.TAG_STYLING)
|| tag.equals(TtmlNode.TAG_LAYOUT)
|| tag.equals(TtmlNode.TAG_REGION)
|| tag.equals(TtmlNode.TAG_METADATA)
|| tag.equals(TtmlNode.TAG_SMPTE_IMAGE)
|| tag.equals(TtmlNode.TAG_SMPTE_DATA)
|| tag.equals(TtmlNode.TAG_SMPTE_INFORMATION)) {
return true;
}
return false;
}
/**
* Parses a time expression, returning the parsed timestamp.
* <p>
* For the format of a time expression, see:
* <a href="http://www.w3.org/TR/ttaf1-dfxp/#timing-value-timeExpression">timeExpression</a>
*
* @param time A string that includes the time expression.
* @param frameRate The frame rate of the stream.
* @param subframeRate The sub-frame rate of the stream
* @param tickRate The tick rate of the stream.
* @return The parsed timestamp in microseconds.
* @throws ParserException If the given string does not contain a valid time expression.
*/
private static long parseTimeExpression(String time, int frameRate, int subframeRate,
int tickRate) throws ParserException {
Matcher matcher = CLOCK_TIME.matcher(time);
if (matcher.matches()) {
String hours = matcher.group(1);
double durationSeconds = Long.parseLong(hours) * 3600;
String minutes = matcher.group(2);
durationSeconds += Long.parseLong(minutes) * 60;
String seconds = matcher.group(3);
durationSeconds += Long.parseLong(seconds);
String fraction = matcher.group(4);
durationSeconds += (fraction != null) ? Double.parseDouble(fraction) : 0;
String frames = matcher.group(5);
durationSeconds += (frames != null) ? ((double) Long.parseLong(frames)) / frameRate : 0;
String subframes = matcher.group(6);
durationSeconds += (subframes != null) ?
((double) Long.parseLong(subframes)) / subframeRate / frameRate : 0;
return (long) (durationSeconds * C.MICROS_PER_SECOND);
}
matcher = OFFSET_TIME.matcher(time);
if (matcher.matches()) {
String timeValue = matcher.group(1);
double offsetSeconds = Double.parseDouble(timeValue);
String unit = matcher.group(2);
if (unit.equals("h")) {
offsetSeconds *= 3600;
} else if (unit.equals("m")) {
offsetSeconds *= 60;
} else if (unit.equals("s")) {
// Do nothing.
} else if (unit.equals("ms")) {
offsetSeconds /= 1000;
} else if (unit.equals("f")) {
offsetSeconds /= frameRate;
} else if (unit.equals("t")) {
offsetSeconds /= tickRate;
}
return (long) (offsetSeconds * C.MICROS_PER_SECOND);
}
throw new ParserException("Malformed time expression: " + time);
}
}