/** * Licensed to The Apereo Foundation under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * * The Apereo Foundation licenses this file to you under the Educational * Community License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License * at: * * http://opensource.org/licenses/ecl2.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * */ package org.opencastproject.inspection.ffmpeg; import static org.opencastproject.inspection.api.MediaInspectionOptions.OPTION_ACCURATE_FRAME_COUNT; import static org.opencastproject.util.data.Collections.map; import org.opencastproject.inspection.api.MediaInspectionException; import org.opencastproject.inspection.ffmpeg.api.AudioStreamMetadata; import org.opencastproject.inspection.ffmpeg.api.MediaAnalyzer; import org.opencastproject.inspection.ffmpeg.api.MediaAnalyzerException; import org.opencastproject.inspection.ffmpeg.api.MediaContainerMetadata; import org.opencastproject.inspection.ffmpeg.api.VideoStreamMetadata; import org.opencastproject.mediapackage.MediaPackageElement; import org.opencastproject.mediapackage.MediaPackageElementBuilder; import org.opencastproject.mediapackage.MediaPackageElementBuilderFactory; import org.opencastproject.mediapackage.MediaPackageElementFlavor; import org.opencastproject.mediapackage.Stream; import org.opencastproject.mediapackage.Track; import org.opencastproject.mediapackage.UnsupportedElementException; import org.opencastproject.mediapackage.track.AudioStreamImpl; import org.opencastproject.mediapackage.track.TrackImpl; import org.opencastproject.mediapackage.track.VideoStreamImpl; import org.opencastproject.util.Checksum; import org.opencastproject.util.ChecksumType; import org.opencastproject.util.IoSupport; import org.opencastproject.util.MimeType; import org.opencastproject.util.MimeTypes; import org.opencastproject.util.NotFoundException; import org.opencastproject.util.UnknownFileTypeException; import org.opencastproject.util.data.Tuple; import org.opencastproject.workspace.api.Workspace; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.BooleanUtils; import org.apache.tika.metadata.HttpHeaders; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.util.Dictionary; import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.Map.Entry; /** * Contains the business logic for media inspection. Its primary purpose is to decouple the inspection logic from all * OSGi/MH job management boilerplate. */ public class MediaInspector { private static final Logger logger = LoggerFactory.getLogger(MediaInspector.class); private final Workspace workspace; /** The Apache Tika parser */ private final Parser tikaParser; private final String ffprobePath; public MediaInspector(Workspace workspace, Parser tikaParser, String ffprobePath) { this.workspace = workspace; this.tikaParser = tikaParser; this.ffprobePath = ffprobePath; } /** * Inspects the element that is passed in as uri. * * @param trackURI * the element uri * @return the inspected track * @throws org.opencastproject.inspection.api.MediaInspectionException * if inspection fails */ public Track inspectTrack(URI trackURI, Map<String, String> options) throws MediaInspectionException { logger.debug("inspect(" + trackURI + ") called, using workspace " + workspace); throwExceptionIfInvalid(options); try { // Get the file from the URL (runtime exception if invalid) File file = null; try { file = workspace.get(trackURI); } catch (NotFoundException notFound) { throw new MediaInspectionException("Unable to find resource " + trackURI, notFound); } catch (IOException ioe) { throw new MediaInspectionException("Error reading " + trackURI + " from workspace", ioe); } // Make sure the file has an extension. Otherwise, tools like ffmpeg will not work. // TODO: Try to guess the extension from the container's metadata if ("".equals(FilenameUtils.getExtension(file.getName()))) { throw new MediaInspectionException("Can not inspect files without a filename extension"); } MediaContainerMetadata metadata = getFileMetadata(file, getAccurateFrameCount(options)); if (metadata == null) { throw new MediaInspectionException("Media analyzer returned no metadata from " + file); } else { MediaPackageElementBuilder elementBuilder = MediaPackageElementBuilderFactory.newInstance().newElementBuilder(); TrackImpl track; MediaPackageElement element; try { element = elementBuilder.elementFromURI(trackURI, MediaPackageElement.Type.Track, null); } catch (UnsupportedElementException e) { throw new MediaInspectionException("Unable to create track element from " + file, e); } track = (TrackImpl) element; // Duration if (metadata.getDuration() != null && metadata.getDuration() > 0) track.setDuration(metadata.getDuration()); // Checksum try { track.setChecksum(Checksum.create(ChecksumType.DEFAULT_TYPE, file)); } catch (IOException e) { throw new MediaInspectionException("Unable to read " + file, e); } // Mimetype InputStream is = null; try { // Try to get the Mimetype from Apache Tika is = new FileInputStream(file); MimeType mimeType = extractContentType(is); // If Mimetype could not be extracted try to get it from opencast if (mimeType == null) { mimeType = MimeTypes.fromURL(file.toURI().toURL()); // The mimetype library doesn't know about audio/video metadata, so the type might be wrong. if ("audio".equals(mimeType.getType()) && metadata.hasVideoStreamMetadata()) { mimeType = MimeTypes.parseMimeType("video/" + mimeType.getSubtype()); } else if ("video".equals(mimeType.getType()) && !metadata.hasVideoStreamMetadata()) { mimeType = MimeTypes.parseMimeType("audio/" + mimeType.getSubtype()); } } track.setMimeType(mimeType); } catch (Exception e) { logger.error("Unable to find mimetype for {}", file.getAbsolutePath()); } finally { IoSupport.closeQuietly(is); } // Audio metadata try { addAudioStreamMetadata(track, metadata); } catch (Exception e) { throw new MediaInspectionException("Unable to extract audio metadata from " + file, e); } // Videometadata try { addVideoStreamMetadata(track, metadata); } catch (Exception e) { throw new MediaInspectionException("Unable to extract video metadata from " + file, e); } return track; } } catch (Exception e) { logger.warn("Error inspecting " + trackURI, e); if (e instanceof MediaInspectionException) { throw (MediaInspectionException) e; } else { throw new MediaInspectionException(e); } } } /** * Enriches the given element's mediapackage. * * @param element * the element to enrich * @param override * <code>true</code> to override existing metadata * @return the enriched element * @throws MediaInspectionException * if enriching fails */ public MediaPackageElement enrich(MediaPackageElement element, boolean override, final Map<String, String> options) throws MediaInspectionException { throwExceptionIfInvalid(options); if (element instanceof Track) { final Track originalTrack = (Track) element; return enrichTrack(originalTrack, override, options); } else { return enrichElement(element, override, options); } } /** * Enriches the track's metadata and can be executed in an asynchronous way. * * @param originalTrack * the original track * @param override * <code>true</code> to override existing metadata * @return the media package element * @throws MediaInspectionException */ private MediaPackageElement enrichTrack(final Track originalTrack, final boolean override, final Map<String, String> options) throws MediaInspectionException { try { URI originalTrackUrl = originalTrack.getURI(); MediaPackageElementFlavor flavor = originalTrack.getFlavor(); logger.debug("enrich(" + originalTrackUrl + ") called"); // Get the file from the URL File file = null; try { file = workspace.get(originalTrackUrl); } catch (NotFoundException e) { throw new MediaInspectionException("File " + originalTrackUrl + " was not found and can therefore not be " + "inspected", e); } catch (IOException e) { throw new MediaInspectionException("Error accessing " + originalTrackUrl, e); } // Make sure the file has an extension. Otherwise, tools like ffmpeg will not work. // TODO: Try to guess the extension from the container's metadata if ("".equals(FilenameUtils.getExtension(file.getName()))) { throw new MediaInspectionException("Can not inspect files without a filename extension"); } MediaContainerMetadata metadata = getFileMetadata(file, getAccurateFrameCount(options)); if (metadata == null) { throw new MediaInspectionException("Unable to acquire media metadata for " + originalTrackUrl); } else { TrackImpl track = null; try { track = (TrackImpl) MediaPackageElementBuilderFactory.newInstance().newElementBuilder() .elementFromURI(originalTrackUrl, MediaPackageElement.Type.Track, flavor); } catch (UnsupportedElementException e) { throw new MediaInspectionException("Unable to create track element from " + file, e); } // init the new track with old track.setChecksum(originalTrack.getChecksum()); track.setDuration(originalTrack.getDuration()); track.setElementDescription(originalTrack.getElementDescription()); track.setFlavor(flavor); track.setIdentifier(originalTrack.getIdentifier()); track.setMimeType(originalTrack.getMimeType()); track.setReference(originalTrack.getReference()); track.setSize(file.length()); track.setURI(originalTrackUrl); for (String tag : originalTrack.getTags()) { track.addTag(tag); } // enrich the new track with basic info if (track.getDuration() == null || override) track.setDuration(metadata.getDuration()); if (track.getChecksum() == null || override) { try { track.setChecksum(Checksum.create(ChecksumType.DEFAULT_TYPE, file)); } catch (IOException e) { throw new MediaInspectionException("Unable to read " + file, e); } } // Add the mime type if it's not already present if (track.getMimeType() == null || override) { try { MimeType mimeType = MimeTypes.fromURI(track.getURI()); // The mimetype library doesn't know about audio/video metadata, so the type might be wrong. if ("audio".equals(mimeType.getType()) && metadata.hasVideoStreamMetadata()) { mimeType = MimeTypes.parseMimeType("video/" + mimeType.getSubtype()); } else if ("video".equals(mimeType.getType()) && !metadata.hasVideoStreamMetadata()) { mimeType = MimeTypes.parseMimeType("audio/" + mimeType.getSubtype()); } track.setMimeType(mimeType); } catch (UnknownFileTypeException e) { logger.info("Unable to detect the mimetype for track {} at {}", track.getIdentifier(), track.getURI()); } } // find all streams Dictionary<String, Stream> streamsId2Stream = new Hashtable<String, Stream>(); for (Stream stream : originalTrack.getStreams()) { streamsId2Stream.put(stream.getIdentifier(), stream); } // audio list try { addAudioStreamMetadata(track, metadata); } catch (Exception e) { throw new MediaInspectionException("Unable to extract audio metadata from " + file, e); } // video list try { addVideoStreamMetadata(track, metadata); } catch (Exception e) { throw new MediaInspectionException("Unable to extract video metadata from " + file, e); } logger.info("Successfully inspected track {}", track); return track; } } catch (Exception e) { logger.warn("Error enriching track " + originalTrack, e); if (e instanceof MediaInspectionException) { throw (MediaInspectionException) e; } else { throw new MediaInspectionException(e); } } } /** * Enriches the media package element metadata such as the mime type, the file size etc. The method mutates the * argument element. * * @param element * the media package element * @param override * <code>true</code> to overwrite existing metadata * @return the enriched element * @throws MediaInspectionException * if enriching fails */ private MediaPackageElement enrichElement(final MediaPackageElement element, final boolean override, final Map<String, String> options) throws MediaInspectionException { try { File file; try { file = workspace.get(element.getURI()); } catch (NotFoundException e) { throw new MediaInspectionException("Unable to find " + element.getURI() + " in the workspace", e); } catch (IOException e) { throw new MediaInspectionException("Error accessing " + element.getURI() + " in the workspace", e); } // Checksum if (element.getChecksum() == null || override) { try { element.setChecksum(Checksum.create(ChecksumType.DEFAULT_TYPE, file)); } catch (IOException e) { throw new MediaInspectionException("Error generating checksum for " + element.getURI(), e); } } // Mimetype if (element.getMimeType() == null || override) { try { element.setMimeType(MimeTypes.fromURI(file.toURI())); } catch (UnknownFileTypeException e) { logger.info("unable to determine the mime type for {}", file.getName()); } } logger.info("Successfully inspected element {}", element); return element; } catch (Exception e) { logger.warn("Error enriching element " + element, e); if (e instanceof MediaInspectionException) { throw (MediaInspectionException) e; } else { throw new MediaInspectionException(e); } } } /** * Asks the media analyzer to extract the file's metadata. * * @param file * the file * @return the file container metadata * @throws MediaInspectionException * if metadata extraction fails */ private MediaContainerMetadata getFileMetadata(File file, boolean accurateFrameCount) throws MediaInspectionException { if (file == null) throw new IllegalArgumentException("file to analyze cannot be null"); try { MediaAnalyzer analyzer = new FFmpegAnalyzer(accurateFrameCount); analyzer.setConfig(map(Tuple.<String, Object> tuple(FFmpegAnalyzer.FFPROBE_BINARY_CONFIG, ffprobePath))); return analyzer.analyze(file); } catch (MediaAnalyzerException e) { throw new MediaInspectionException(e); } } /** * Adds the video related metadata to the track. * * @param track * the track * @param metadata * the container metadata * @throws Exception * Media analysis is fragile, and may throw any kind of runtime exceptions due to inconsistencies in the * media's metadata */ private Track addVideoStreamMetadata(TrackImpl track, MediaContainerMetadata metadata) throws Exception { List<VideoStreamMetadata> videoList = metadata.getVideoStreamMetadata(); if (videoList != null && !videoList.isEmpty()) { for (int i = 0; i < videoList.size(); i++) { VideoStreamImpl video = new VideoStreamImpl("video-" + (i + 1)); VideoStreamMetadata v = videoList.get(i); video.setBitRate(v.getBitRate()); video.setFormat(v.getFormat()); video.setFormatVersion(v.getFormatVersion()); video.setFrameCount(v.getFrames()); video.setFrameHeight(v.getFrameHeight()); video.setFrameRate(v.getFrameRate()); video.setFrameWidth(v.getFrameWidth()); video.setScanOrder(v.getScanOrder()); video.setScanType(v.getScanType()); // TODO: retain the original video metadata track.addStream(video); } } return track; } /** * Adds the audio related metadata to the track. * * @param track * the track * @param metadata * the container metadata * @throws Exception * Media analysis is fragile, and may throw any kind of runtime exceptions due to inconsistencies in the * media's metadata */ private Track addAudioStreamMetadata(TrackImpl track, MediaContainerMetadata metadata) throws Exception { List<AudioStreamMetadata> audioList = metadata.getAudioStreamMetadata(); if (audioList != null && !audioList.isEmpty()) { for (int i = 0; i < audioList.size(); i++) { AudioStreamImpl audio = new AudioStreamImpl("audio-" + (i + 1)); AudioStreamMetadata a = audioList.get(i); audio.setBitRate(a.getBitRate()); audio.setChannels(a.getChannels()); audio.setFormat(a.getFormat()); audio.setFormatVersion(a.getFormatVersion()); audio.setFrameCount(a.getFrames()); audio.setBitDepth(a.getResolution()); audio.setSamplingRate(a.getSamplingRate()); // TODO: retain the original audio metadata track.addStream(audio); } } return track; } /** * Determines the content type of an input stream. This method reads part of the stream, so it is typically best to * close the stream immediately after calling this method. * * @param in * the input stream * @return the content type */ private MimeType extractContentType(InputStream in) { try { // Find the content type, based on the stream content BodyContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); tikaParser.parse(in, contenthandler, metadata, context); String mimeType = metadata.get(HttpHeaders.CONTENT_TYPE); if (mimeType == null) return null; return MimeTypes.parseMimeType(mimeType); } catch (Exception e) { logger.warn("Unable to extract mimetype from input stream, ", e); return null; } } /* Return true if OPTION_ACCURATE_FRAME_COUNT is set to true, false otherwise */ private boolean getAccurateFrameCount(final Map<String, String> options) { return BooleanUtils.toBoolean(options.get(OPTION_ACCURATE_FRAME_COUNT)); } /* Throws an exception if an unsupported option is set */ private void throwExceptionIfInvalid(final Map<String, String> options) throws MediaInspectionException { if (options != null) { for (Entry e : options.entrySet()) { if (e.getKey().equals(OPTION_ACCURATE_FRAME_COUNT)) { // This option is supported } else { throw new MediaInspectionException("Unsupported option " + e.getKey()); } } } else { throw new MediaInspectionException("Options must not be null"); } } }