/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tika.parser.video; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import static java.nio.charset.StandardCharsets.UTF_8; /** * <p> * Parser for metadata contained in Flash Videos (.flv). Resources: * http://osflash.org/flv and for AMF: * http://download.macromedia.com/pub/labs/amf/amf0_spec_121207.pdf * <p> * This parser is capable of extracting the general metadata from header as well * as embedded metadata. * <p> * Known keys for metadata (from file header): * <ol> * <li>hasVideo: true|false * <li>hasSound: true|false * </ol> * <p> * In addition to the above values also metadata that is inserted in to the * actual stream will be picked. Usually there are keys like: * hasKeyframes, lastkeyframetimestamp, audiocodecid, keyframes, filepositions, * hasMetadata, audiosamplerate, videodatarate metadatadate, videocodecid, * metadatacreator, audiosize, hasVideo, height, audiosamplesize, framerate, * hasCuePoints width, cuePoints, lasttimestamp, canSeekToEnd, datasize, * duration, videosize, filesize, audiodatarate, hasAudio, stereo audiodelay */ public class FLVParser extends AbstractParser { /** Serial version UID */ private static final long serialVersionUID = -8718013155719197679L; private static int TYPE_METADATA = 0x12; private static byte MASK_AUDIO = 1; private static byte MASK_VIDEO = 4; private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.video("x-flv")); public Set<MediaType> getSupportedTypes(ParseContext context) { return SUPPORTED_TYPES; } private long readUInt32(DataInputStream input) throws IOException { return input.readInt() & 0xFFFFFFFFL; } private int readUInt24(DataInputStream input) throws IOException { int uint = input.read()<<16; uint += input.read()<<8; uint += input.read(); return uint; } private Object readAMFData(DataInputStream input, int type) throws IOException { if (type == -1) { type = input.readUnsignedByte(); } switch (type) { case 0: return input.readDouble(); case 1: return input.readUnsignedByte() == 1; case 2: return readAMFString(input); case 3: return readAMFObject(input); case 8: return readAMFEcmaArray(input); case 10: return readAMFStrictArray(input); case 11: final Date date = new Date((long) input.readDouble()); input.readShort(); // time zone return date; case 13: return "UNDEFINED"; default: return null; } } private Object readAMFStrictArray(DataInputStream input) throws IOException { long count = readUInt32(input); ArrayList<Object> list = new ArrayList<Object>(); for (int i = 0; i < count; i++) { list.add(readAMFData(input, -1)); } return list; } private String readAMFString(DataInputStream input) throws IOException { int size = input.readUnsignedShort(); byte[] chars = new byte[size]; input.readFully(chars); return new String(chars, UTF_8); } private Object readAMFObject(DataInputStream input) throws IOException { HashMap<String, Object> array = new HashMap<String, Object>(); while (true) { String key = readAMFString(input); int dataType = input.read(); if (dataType == 9) { // object end marker break; } array.put(key, readAMFData(input, dataType)); } return array; } private Object readAMFEcmaArray(DataInputStream input) throws IOException { long size = readUInt32(input); HashMap<String, Object> array = new HashMap<String, Object>(); for (int i = 0; i < size; i++) { String key = readAMFString(input); int dataType = input.read(); array.put(key, readAMFData(input, dataType)); } return array; } private boolean checkSignature(DataInputStream fis) throws IOException { return fis.read() == 'F' && fis.read() == 'L' && fis.read() == 'V'; } public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { DataInputStream datainput = new DataInputStream(stream); if (!checkSignature(datainput)) { throw new TikaException("FLV signature not detected"); } // header int version = datainput.readUnsignedByte(); if (version != 1) { // should be 1, perhaps this is not flv? throw new TikaException("Unpexpected FLV version: " + version); } int typeFlags = datainput.readUnsignedByte(); long len = readUInt32(datainput); if (len != 9) { // we only know about format with header of 9 bytes throw new TikaException("Unpexpected FLV header length: " + len); } long sizePrev = readUInt32(datainput); if (sizePrev != 0) { // should be 0, perhaps this is not flv? throw new TikaException( "Unpexpected FLV first previous block size: " + sizePrev); } metadata.set(Metadata.CONTENT_TYPE, "video/x-flv"); metadata.set("hasVideo", Boolean.toString((typeFlags & MASK_VIDEO) != 0)); metadata.set("hasAudio", Boolean.toString((typeFlags & MASK_AUDIO) != 0)); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); // flv tag stream follows... while (true) { int type = datainput.read(); if (type == -1) { // EOF break; } int datalen = readUInt24(datainput); //body length readUInt32(datainput); // timestamp readUInt24(datainput); // streamid if (type == TYPE_METADATA) { // found metadata Tag, read content to buffer byte[] metaBytes = new byte[datalen]; for (int readCount = 0; readCount < datalen;) { int r = stream.read(metaBytes, readCount, datalen - readCount); if(r!=-1) { readCount += r; } else { break; } } ByteArrayInputStream is = new ByteArrayInputStream(metaBytes); DataInputStream dis = new DataInputStream(is); Object data = null; for (int i = 0; i < 2; i++) { data = readAMFData(dis, -1); } if (data instanceof Map) { // TODO if there are multiple metadata values with same key (in // separate AMF blocks, we currently loose previous values) Map<String, Object> extractedMetadata = (Map<String, Object>) data; for (Entry<String, Object> entry : extractedMetadata.entrySet()) { if (entry.getValue() == null) { continue; } metadata.set(entry.getKey(), entry.getValue().toString()); } } } else { // Tag was not metadata, skip over data we cannot handle for (int i = 0; i < datalen; i++) { datainput.readByte(); } } sizePrev = readUInt32(datainput); // previous block size if (sizePrev != datalen + 11) { // file was corrupt or we could not parse it... break; } } xhtml.endDocument(); } }