/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE file at the root of the source
* tree and available online at
*
* https://github.com/keeps/roda
*/
package org.roda.core.plugins.plugins.characterization;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.io.IOUtils;
import org.roda.core.data.common.RodaConstants;
import org.roda.core.data.common.RodaConstants.PreservationEventType;
import org.roda.core.data.exceptions.AlreadyExistsException;
import org.roda.core.data.exceptions.AuthorizationDeniedException;
import org.roda.core.data.exceptions.GenericException;
import org.roda.core.data.exceptions.NotFoundException;
import org.roda.core.data.exceptions.RODAException;
import org.roda.core.data.exceptions.RequestNotValidException;
import org.roda.core.data.v2.LiteOptionalWithCause;
import org.roda.core.data.v2.ip.AIP;
import org.roda.core.data.v2.ip.AIPState;
import org.roda.core.data.v2.ip.Representation;
import org.roda.core.data.v2.ip.StoragePath;
import org.roda.core.data.v2.ip.metadata.LinkingIdentifier;
import org.roda.core.data.v2.jobs.Job;
import org.roda.core.data.v2.jobs.PluginType;
import org.roda.core.data.v2.jobs.Report;
import org.roda.core.data.v2.jobs.Report.PluginState;
import org.roda.core.data.v2.validation.ValidationException;
import org.roda.core.data.v2.validation.ValidationIssue;
import org.roda.core.data.v2.validation.ValidationReport;
import org.roda.core.index.IndexService;
import org.roda.core.model.ModelService;
import org.roda.core.model.utils.ModelUtils;
import org.roda.core.plugins.AbstractPlugin;
import org.roda.core.plugins.Plugin;
import org.roda.core.plugins.PluginException;
import org.roda.core.plugins.RODAObjectProcessingLogic;
import org.roda.core.plugins.orchestrate.SimpleJobPluginInfo;
import org.roda.core.plugins.plugins.PluginHelper;
import org.roda.core.storage.ContentPayload;
import org.roda.core.storage.DirectResourceAccess;
import org.roda.core.storage.StorageService;
import org.roda.core.storage.fs.FSPathContentPayload;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class MediaInfoPlugin extends AbstractPlugin<AIP> {
private static final Logger LOGGER = LoggerFactory.getLogger(MediaInfoPlugin.class);
@Override
public void init() throws PluginException {
// do nothing
}
@Override
public void shutdown() {
// do nothing
}
@Override
public String getName() {
return "AIP feature extraction (MediaInfo)";
}
@Override
public String getDescription() {
return "MediaInfo extracts technical and tag data for video and audio files.\nMediaInfo supports popular video formats (e.g. AVI, WMV, "
+ "QuickTime, Real, DivX, XviD) as well as lesser known or emerging formats such as MKV including WebM.\nMediaInfo reveals information "
+ "such as: Title, author, director, album, track number, date, duration, codec, aspect ratio, framerate, bitrate, Audio codec, sample "
+ "rate, channels, language, bitrate, subtitle language, etc.\nThe task updates PREMIS objects metadata in the Archival Information "
+ "Package (AIP) to store the results of the characterization process. A PREMIS event is also recorded after the task is run.";
}
@Override
public String getVersionImpl() {
return "1.0";
}
@Override
public Report execute(IndexService index, ModelService model, StorageService storage,
List<LiteOptionalWithCause> liteList) throws PluginException {
return PluginHelper.processObjects(this, new RODAObjectProcessingLogic<AIP>() {
@Override
public void process(IndexService index, ModelService model, StorageService storage, Report report, Job cachedJob,
SimpleJobPluginInfo jobPluginInfo, Plugin<AIP> plugin, AIP object) {
processAIP(index, model, storage, report, jobPluginInfo, cachedJob, object);
}
}, index, model, storage, liteList);
}
private void processAIP(IndexService index, ModelService model, StorageService storage, Report report,
SimpleJobPluginInfo jobPluginInfo, Job job, AIP aip) {
LOGGER.debug("Processing AIP {}", aip.getId());
boolean inotify = false;
Report reportItem = PluginHelper.initPluginReportItem(this, aip.getId(), AIP.class, AIPState.INGEST_PROCESSING);
PluginHelper.updatePartialJobReport(this, model, reportItem, false, job);
PluginState reportState = PluginState.SUCCESS;
ValidationReport validationReport = new ValidationReport();
List<LinkingIdentifier> sources = new ArrayList<>();
for (Representation representation : aip.getRepresentations()) {
LOGGER.debug("Processing representation {} from AIP {}", representation.getId(), aip.getId());
DirectResourceAccess directAccess = null;
try {
StoragePath representationDataPath = ModelUtils.getRepresentationDataStoragePath(aip.getId(),
representation.getId());
directAccess = storage.getDirectAccess(representationDataPath);
String mediaInfoOutput = MediaInfoPluginUtils.runMediaInfoOnPath(directAccess.getPath());
Map<String, Path> mediaInfoParsed = parseMediaInfoOutput(mediaInfoOutput);
for (Map.Entry<String, Path> entry : mediaInfoParsed.entrySet()) {
// XXX directories are not supported
List<String> directoryPath = new ArrayList<>();
String fileId = entry.getKey();
ContentPayload payload = new FSPathContentPayload(entry.getValue());
LOGGER.debug("Creating other metadata (AIP: {}, REPRESENTATION: {}, FILE: {})", aip.getId(),
representation.getId(), entry.getValue().toFile().getName());
model.createOrUpdateOtherMetadata(aip.getId(), representation.getId(), directoryPath, fileId, ".xml",
RodaConstants.OTHER_METADATA_TYPE_MEDIAINFO, payload, inotify);
sources.add(PluginHelper.getLinkingIdentifier(aip.getId(), representation.getId(), directoryPath, fileId,
RodaConstants.PRESERVATION_LINKING_OBJECT_SOURCE));
}
} catch (RODAException | IOException | XPathExpressionException | ParserConfigurationException | SAXException
| TransformerFactoryConfigurationError | TransformerException e) {
LOGGER.error("Error processing AIP {}: {}", aip.getId(), e.getMessage());
reportState = PluginState.FAILURE;
validationReport.addIssue(new ValidationIssue(e.getMessage()));
} finally {
IOUtils.closeQuietly(directAccess);
}
}
try {
model.notifyAipUpdated(aip.getId());
} catch (RequestNotValidException | GenericException | NotFoundException | AuthorizationDeniedException e) {
LOGGER.error("Error notifying of AIP update", e);
}
if (reportState.equals(PluginState.SUCCESS)) {
jobPluginInfo.incrementObjectsProcessedWithSuccess();
reportItem.setPluginState(PluginState.SUCCESS);
} else {
jobPluginInfo.incrementObjectsProcessedWithFailure();
reportItem.setHtmlPluginDetails(true).setPluginState(PluginState.FAILURE);
reportItem.setPluginDetails(validationReport.toHtml(false, false, false, "Error list"));
}
try {
PluginHelper.createPluginEvent(this, aip.getId(), model, index, sources, new ArrayList<LinkingIdentifier>(),
reportItem.getPluginState(), "", true);
} catch (ValidationException | RequestNotValidException | NotFoundException | GenericException
| AuthorizationDeniedException | AlreadyExistsException e) {
LOGGER.error("Error creating event: {}", e.getMessage(), e);
}
report.addReport(reportItem);
PluginHelper.updatePartialJobReport(this, model, reportItem, true, job);
}
private Map<String, Path> parseMediaInfoOutput(String mediaInfoOutput) throws ParserConfigurationException,
SAXException, IOException, TransformerFactoryConfigurationError, TransformerException, XPathExpressionException {
Map<String, Path> parsed = new HashMap<>();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(mediaInfoOutput));
Document doc = db.parse(is);
NodeList nodes = doc.getElementsByTagName("File");
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
Path nodeResult = Files.createTempFile("mediaInfo", ".xml");
try (FileWriter fw = new FileWriter(nodeResult.toFile())) {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), new StreamResult(fw));
String fileName = extractFileName(nodeResult);
String[] tokens = fileName.split("/");
fileName = tokens[tokens.length - 1];
parsed.put(fileName, nodeResult);
}
}
return parsed;
}
private String extractFileName(Path nodeResult) throws ParserConfigurationException, IOException, SAXException {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(Files.newBufferedReader(nodeResult));
Document doc = db.parse(is);
NodeList nodes = doc.getElementsByTagName("Complete_name");
return nodes.item(0).getTextContent();
}
@Override
public Plugin<AIP> cloneMe() {
return new MediaInfoPlugin();
}
@Override
public PluginType getType() {
return PluginType.AIP_TO_AIP;
}
@Override
public boolean areParameterValuesValid() {
return true;
}
@Override
public PreservationEventType getPreservationEventType() {
return PreservationEventType.METADATA_EXTRACTION;
}
@Override
public String getPreservationEventDescription() {
return "Extracted metadata using MediaInfo";
}
@Override
public String getPreservationEventSuccessMessage() {
return "Extracted metadata using MediaInfo successfully";
}
@Override
public String getPreservationEventFailureMessage() {
return "Extracted metadata using MediaInfo with failures";
}
@Override
public Report beforeAllExecute(IndexService index, ModelService model, StorageService storage)
throws PluginException {
// do nothing
return null;
}
@Override
public Report afterAllExecute(IndexService index, ModelService model, StorageService storage) throws PluginException {
// do nothing
return null;
}
@Override
public List<String> getCategories() {
return Arrays.asList(RodaConstants.PLUGIN_CATEGORY_FEATURE_EXTRACTION,
RodaConstants.PLUGIN_CATEGORY_CHARACTERIZATION);
}
@Override
public List<Class<AIP>> getObjectClasses() {
return Arrays.asList(AIP.class);
}
}