/* * Copyright (C) 2013 Robert Simonovsky * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package cz.cas.lib.proarc.common.export.mets; import java.io.File; import java.io.IOException; import java.math.BigInteger; import java.net.URL; import java.util.Calendar; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.bind.JAXBElement; import javax.xml.datatype.XMLGregorianCalendar; import javax.xml.namespace.QName; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.dom.DOMResult; import javax.xml.transform.dom.DOMSource; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathFactory; import org.apache.commons.io.FileUtils; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import cz.cas.lib.proarc.common.export.mets.structure.IMetsElement; import cz.cas.lib.proarc.common.fedora.DigitalObjectException; import cz.cas.lib.proarc.common.fedora.MixEditor; import cz.cas.lib.proarc.common.fedora.RemoteStorage.RemoteObject; import cz.cas.lib.proarc.mix.BasicDigitalObjectInformationType.Compression; import cz.cas.lib.proarc.mix.BasicDigitalObjectInformationType.ObjectIdentifier; import cz.cas.lib.proarc.mix.BasicImageInformationType; import cz.cas.lib.proarc.mix.BasicImageInformationType.BasicImageCharacteristics.PhotometricInterpretation; import cz.cas.lib.proarc.mix.ChangeHistoryType; import cz.cas.lib.proarc.mix.ChangeHistoryType.ImageProcessing; import cz.cas.lib.proarc.mix.BasicDigitalObjectInformationType; import cz.cas.lib.proarc.mix.ImageCaptureMetadataType; import cz.cas.lib.proarc.mix.Mix; import cz.cas.lib.proarc.mix.MixType; import cz.cas.lib.proarc.mix.MixUtils; import cz.cas.lib.proarc.mix.OrientationType; import cz.cas.lib.proarc.mix.StringType; import cz.cas.lib.proarc.mix.TypeOfDateType; import cz.cas.lib.proarc.mix.TypeOfOrientationType; import edu.harvard.hul.ois.jhove.App; import edu.harvard.hul.ois.jhove.JhoveBase; import edu.harvard.hul.ois.jhove.Module; import edu.harvard.hul.ois.jhove.OutputHandler; import java.util.UUID; /** * @author Robert Simonovsky * * Utility class for jHove application * */ public class JhoveUtility { private static final Logger LOG = Logger.getLogger(JhoveUtility.class.getName()); static final String JHOVE_CONFIG_NAME = "jhove.conf"; static { LOG.setLevel(Level.SEVERE); } public static Node getNodeRecursive(Node node, String localName) { if ((node.getLocalName() != null) && (node.getLocalName().startsWith(localName))) { return node; } else { NodeList nl = node.getChildNodes(); if (nl == null) { return null; } for (int a = 0; a < nl.getLength(); a++) { Node found = getNodeRecursive(nl.item(a), localName); if (found != null) { return found; } } } return null; } /** * * Inits the Jhove app * * @param metsInfo */ public static void initJhove(MetsContext metsContext) throws MetsExportException { if (metsContext.getJhoveContext() == null) { File configFolder = new File(metsContext.getOutputPath(), metsContext.getPackageID()); metsContext.setJhoveContext(createContext(configFolder)); } } /** * Creates the JHOVE context and stores its configuration in a default temp folder. * Use {@link JhoveContext#destroy() } to remove temp folder. * * @return the context * @throws MetsExportException failure */ public static JhoveContext createContext() throws MetsExportException { File temp = new File(FileUtils.getTempDirectory(), "jhove" + UUID.randomUUID().toString()); if (!temp.mkdir()) { throw new MetsExportException("Cannot create " + temp.toString()); } temp.deleteOnExit(); return createContext(temp); } /** * Creates the JHOVE context and stores its configuration in the passed folder. * <p>{@link JhoveContext#destroy() } will remove the configuration folder! * @param configFolder folder to store configuration files * @return the context * @throws MetsExportException failure * @see #destroyConfigFiles */ public static JhoveContext createContext(File configFolder) throws MetsExportException { Calendar calendar = Calendar.getInstance(); App app = new App(JhoveUtility.class.getSimpleName(), "1.0", new int[] { calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH), calendar.get(Calendar.DAY_OF_MONTH) }, "jHove", ""); try { JhoveBase jhoveBase = new JhoveBase(); File jhoveConfigFile = createJhoveConfigurationFile(configFolder); jhoveBase.init(jhoveConfigFile.getAbsolutePath(), null); return new JhoveContext(jhoveBase, configFolder, app); } catch (Exception ex) { throw new MetsExportException("Error while initialising jHove", false, ex); } } /** * Gets MIX of a source image file. * * @param sourceFile image file to describe with MIX * @param tempFolder workspace for JHove * @param deviceMix optional device description * @param dateCreated optional date of creation of the source * @param originalFileName optional image file name * @return the MIX description * @throws MetsExportException failure */ public static JHoveOutput getMix(File sourceFile, File tempFolder, MixType deviceMix, XMLGregorianCalendar dateCreated, String originalFileName ) throws MetsExportException { JhoveContext ctx = createContext(tempFolder); return getMix(sourceFile, ctx, deviceMix, dateCreated, null); } /** * * Returns the MIX data for fiven element * * @param targetFile * @param metsContext * @param deviceMix * @param dateCreated * @param originalFileName * @return * @throws MetsExportException */ public static JHoveOutput getMix(File targetFile, MetsContext metsContext, MixType deviceMix, XMLGregorianCalendar dateCreated, String originalFileName) throws MetsExportException { initJhove(metsContext); JhoveContext jhoveContext = metsContext.getJhoveContext(); return getMix(targetFile, jhoveContext, deviceMix, dateCreated, originalFileName); } /** * Returns the MIX information from the fedoraStream * * @param metsElement * @param streamName * @return * @throws MetsExportException */ public static JHoveOutput getMixFromFedora(IMetsElement metsElement, String streamName) throws MetsExportException { // Document document = null; // hotfix of issue 250 JHoveOutput jhoveOutput = new JHoveOutput(); MixEditor mixEditor; RemoteObject fObj = metsElement.getMetsContext().getRemoteStorage().find(metsElement.getOriginalPid()); if (MixEditor.RAW_ID.equals(streamName)) { mixEditor = MixEditor.raw(fObj); } else if (MixEditor.NDK_ARCHIVAL_ID.equals(streamName)) { mixEditor = MixEditor.ndkArchival(fObj); } else { return null; } Mix mix; try { mix = mixEditor.readMix(); if (mix == null) { return null; } } catch (DigitalObjectException ex) { throw new MetsExportException(metsElement.getOriginalPid(), ex.getMessage(), false, ex); } // if (FoxmlUtils.findDatastream(metsElement.getSourceObject(), streamName) != null) { // List<Element> streamContent = MetsUtils.getDataStreams(metsElement.getMetsContext().getFedoraClient(), metsElement.getOriginalPid(), streamName); // if (streamContent == null) { // return null; // } // document = MetsUtils.getDocumentFromList(streamContent); // } // if (document == null) { // return null; // } // DOMSource domSource = new DOMSource(document); // MixType mix = MixUtils.unmarshal(domSource, MixType.class); jhoveOutput.setMix(mix); jhoveOutput.setFormatVersion(mix.getBasicDigitalObjectInformation().getFormatDesignation().getFormatName().getValue()); return jhoveOutput; } /** * Merges the mix from the device and from the image * * @param source * @param deviceMix */ public static void mergeMix(Mix source, MixType deviceMix) { if (deviceMix != null) { if (deviceMix.getImageCaptureMetadata() != null) { DOMResult domResult = new DOMResult(); MixUtils.marshal(domResult, new JAXBElement<ImageCaptureMetadataType>(new QName("uri", "local"), ImageCaptureMetadataType.class, deviceMix.getImageCaptureMetadata()), true); ImageCaptureMetadataType imageCaptureMtd = MixUtils.unmarshal(new DOMSource(domResult.getNode()), ImageCaptureMetadataType.class); source.setImageCaptureMetadata(imageCaptureMtd); } } } /** * Inserts dateCreated into Mix * * @param mix * @param dateCreated */ public static void insertDateCreated(Mix mix, XMLGregorianCalendar dateCreated) { // inserts DateCreated if missing if ((mix.getImageCaptureMetadata() == null) || (mix.getImageCaptureMetadata().getGeneralCaptureInformation() == null) || (mix.getImageCaptureMetadata().getGeneralCaptureInformation().getDateTimeCreated() == null)) { TypeOfDateType dateTimeCreated = new TypeOfDateType(); dateTimeCreated.setValue(dateCreated.toXMLFormat()); if (mix.getImageCaptureMetadata() == null) { mix.setImageCaptureMetadata(new ImageCaptureMetadataType()); } if (mix.getImageCaptureMetadata().getGeneralCaptureInformation() == null) { mix.getImageCaptureMetadata().setGeneralCaptureInformation(new ImageCaptureMetadataType.GeneralCaptureInformation()); } mix.getImageCaptureMetadata().getGeneralCaptureInformation().setDateTimeCreated(dateTimeCreated); } } /** * inserts ObjectIdentifier into mix * * @param mix * @param pid * @param datastream */ public static void insertObjectIdentifier(Mix mix, String pid, String datastream) { mix.getBasicDigitalObjectInformation().getObjectIdentifier().clear(); ObjectIdentifier identifier = new ObjectIdentifier(); StringType stringTypeIdentifier = new StringType(); stringTypeIdentifier.setValue("ProArc_URI"); StringType stringTypeIdentifierValue = new StringType(); stringTypeIdentifierValue.setValue(Const.FEDORAPREFIX + pid + "/" + Const.dataStreamToModel.get(datastream)); identifier.setObjectIdentifierType(stringTypeIdentifier); identifier.setObjectIdentifierValue(stringTypeIdentifierValue); mix.getBasicDigitalObjectInformation().getObjectIdentifier().add(identifier); } /** * Inserts changeHistory into Mix * * @param mix * @param dateCreated * @param originalFileName */ public static void insertChangeHistory(Mix mix, XMLGregorianCalendar dateCreated, String originalFileName) { if (mix.getChangeHistory() == null) { mix.setChangeHistory(new ChangeHistoryType()); } if (mix.getChangeHistory().getImageProcessing().size() == 0) { ImageProcessing imageProcessing = new ChangeHistoryType.ImageProcessing(); TypeOfDateType dateTimeProcessed = new TypeOfDateType(); dateTimeProcessed.setValue(dateCreated.toXMLFormat()); imageProcessing.setDateTimeProcessed(dateTimeProcessed); StringType sourceData = new StringType(); sourceData.setValue(originalFileName); imageProcessing.setSourceData(sourceData); mix.getChangeHistory().getImageProcessing().add(imageProcessing); } } /** * Gets MIX of a source image file. * * @param sourceFile image file to describe with MIX * @param jhoveContext JHove * @param deviceMix optional device description * @param dateCreated optional date of creation of the source * @param originalFileName optional image file name * @return the MIX description * @throws MetsExportException failure */ public static JHoveOutput getMix(File sourceFile, JhoveContext jhoveContext, MixType deviceMix, XMLGregorianCalendar dateCreated, String originalFileName ) throws MetsExportException { JHoveOutput jhoveOutput = new JHoveOutput(); if (sourceFile == null || !sourceFile.isFile() || !sourceFile.exists()) { LOG.log(Level.SEVERE, "target file '" + sourceFile + "' cannot be found."); throw new MetsExportException("target file '" + sourceFile + "' cannot be found.", false, null); } try { JhoveBase jhoveBase = jhoveContext.getJhoveBase(); File outputFile = File.createTempFile("jhove", "output"); LOG.log(Level.FINE, "JHOVE output file " + outputFile); Module module = jhoveBase.getModule(null); OutputHandler aboutHandler = jhoveBase.getHandler(null); OutputHandler xmlHandler = jhoveBase.getHandler("XML"); LOG.log(Level.FINE, "Calling JHOVE dispatch(...) on file " + sourceFile); jhoveBase.dispatch(jhoveContext.getJhoveApp(), module, aboutHandler, xmlHandler, outputFile.getAbsolutePath(), new String[] { sourceFile.getAbsolutePath() }); DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); builderFactory.setNamespaceAware(true); DocumentBuilder builder = builderFactory.newDocumentBuilder(); Document jHoveDoc = builder.parse(outputFile); outputFile.delete(); Node node = getNodeRecursive(jHoveDoc, "mix"); if (node == null) { return jhoveOutput; } Mix mix = MixUtils.unmarshal(new DOMSource(node), Mix.class); XPath xpath = XPathFactory.newInstance().newXPath(); String formatVersion = xpath.compile("*[local-name()='jhove']/*[local-name()='repInfo']/*[local-name()='version']").evaluate(jHoveDoc); if ((formatVersion == null) || ("0".equals(formatVersion)) || (formatVersion.trim().length() == 0)) { formatVersion = "1.0"; } String formatName = xpath.compile("*[local-name()='jhove']/*[local-name()='repInfo']/*[local-name()='mimeType']").evaluate(jHoveDoc); if ((formatName == null) || (formatName.trim().length() == 0)) { formatName = "unknown"; } jhoveOutput.setFormatVersion(formatVersion); // merge device and jhove Mix mergeMix(mix, deviceMix); // insert date time created if ((dateCreated != null) && (mix != null)) { insertDateCreated(mix, dateCreated); } // insert ChangeHistory if ((dateCreated != null) && (originalFileName != null)) { insertChangeHistory(mix, dateCreated, originalFileName); } // add formatVersion if (mix != null) { if (mix.getBasicDigitalObjectInformation() == null) { mix.setBasicDigitalObjectInformation(new BasicDigitalObjectInformationType()); } if (mix.getBasicDigitalObjectInformation().getFormatDesignation() == null) { mix.getBasicDigitalObjectInformation().setFormatDesignation(new BasicDigitalObjectInformationType.FormatDesignation()); } StringType formatNameType = new StringType(); StringType formatVersionType = new StringType(); formatNameType.setValue(formatName); formatVersionType.setValue(formatVersion); mix.getBasicDigitalObjectInformation().getFormatDesignation().setFormatName(formatNameType); mix.getBasicDigitalObjectInformation().getFormatDesignation().setFormatVersion(formatVersionType); } // workarround for bug in Jhove - Unknown compression for jpeg2000 if ("image/jp2".equals(formatName)) { if (mix.getBasicDigitalObjectInformation() == null) { mix.setBasicDigitalObjectInformation(new BasicDigitalObjectInformationType()); } mix.getBasicDigitalObjectInformation().getCompression().clear(); Compression compression = new BasicDigitalObjectInformationType.Compression(); StringType jpeg2000Type = new StringType(); jpeg2000Type.setValue("JPEG 2000"); compression.setCompressionScheme(jpeg2000Type); mix.getBasicDigitalObjectInformation().getCompression().add(compression); } jhoveOutput.setMix(mix); } catch (Exception e) { throw new MetsExportException("Error inspecting file '" + sourceFile + "' - " + e.getMessage(), false, e); } return jhoveOutput; } /** * Copy the Jhove configuration file to a temporary file. * * @return the {@link File} where the Jhove configuration was saved. * */ private static File createJhoveConfigurationFile(File configFolder) throws MetsExportException { URL jhoveConf = JhoveUtility.class.getResource(JHOVE_CONFIG_NAME); URL jhoveConfXsd = JhoveUtility.class.getResource("jhoveConfig.xsd"); try { File jhoveConfFile = new File(configFolder, JHOVE_CONFIG_NAME); LOG.log(Level.FINE, "JHOVE configuration file " + jhoveConfFile); if (!jhoveConfFile.exists()) { FileUtils.copyURLToFile(jhoveConf, jhoveConfFile); } File xsdFile = new File(jhoveConfFile.getParent(), "jhoveConfig.xsd"); if (!xsdFile.exists()) { FileUtils.copyURLToFile(jhoveConfXsd, xsdFile); } return jhoveConfFile; } catch (IOException ex) { throw new MetsExportException("Unable to create jHove config file", false, ex); } } /** * Removes JHOVE configuration files (not folder) used by the context. * It is here not to break {@link cz.cas.lib.proarc.common.export.mets.structure.MetsElementVisitor}. * @param ctx context */ public static void destroyConfigFiles(JhoveContext ctx) { if (ctx == null) { return ; } File configDir = ctx.getConfigFolder(); File jhoveConfFile = new File(configDir, JHOVE_CONFIG_NAME); LOG.log(Level.FINE, "JHOVE configuration file " + jhoveConfFile); if (jhoveConfFile.exists()) { jhoveConfFile.delete(); } File xsdFile = new File(configDir, "jhoveConfig.xsd"); if (xsdFile.exists()) { xsdFile.delete(); } } /** * adds denominator value * * @param jhoveOutput */ public static void addDenominator(JHoveOutput jhoveOutput) { if ((jhoveOutput != null) && (jhoveOutput.getMix() != null)) { if ((jhoveOutput.getMix().getImageAssessmentMetadata() != null) && (jhoveOutput.getMix().getImageAssessmentMetadata().getSpatialMetrics() != null)) { if ((jhoveOutput.getMix().getImageAssessmentMetadata().getSpatialMetrics().getXSamplingFrequency() != null) && (jhoveOutput.getMix().getImageAssessmentMetadata().getSpatialMetrics().getXSamplingFrequency().getDenominator() == null)) { jhoveOutput.getMix().getImageAssessmentMetadata().getSpatialMetrics().getXSamplingFrequency().setDenominator(BigInteger.ONE); } if ((jhoveOutput.getMix().getImageAssessmentMetadata().getSpatialMetrics().getXSamplingFrequency() != null) && (jhoveOutput.getMix().getImageAssessmentMetadata().getSpatialMetrics().getYSamplingFrequency().getDenominator() == null)) { jhoveOutput.getMix().getImageAssessmentMetadata().getSpatialMetrics().getYSamplingFrequency().setDenominator(BigInteger.ONE); } } } } /** * Adds the photometric information to the mix * * * @param jhoveOutput * @param photometricInterpretation */ public static void addPhotometricInformation(JHoveOutput jhoveOutput, PhotometricInterpretation photometricInterpretation) { if (photometricInterpretation != null) { if (jhoveOutput.getMix().getBasicImageInformation() == null) { jhoveOutput.getMix().setBasicImageInformation(new BasicImageInformationType()); } if (jhoveOutput.getMix().getBasicImageInformation().getBasicImageCharacteristics() == null) { jhoveOutput.getMix().getBasicImageInformation().setBasicImageCharacteristics(new BasicImageInformationType.BasicImageCharacteristics()); } if (jhoveOutput.getMix().getBasicImageInformation().getBasicImageCharacteristics().getPhotometricInterpretation() == null) { DOMResult photometricResult = new DOMResult(); MixUtils.marshal(photometricResult, new JAXBElement<PhotometricInterpretation>(new QName("uri", "local"), PhotometricInterpretation.class, photometricInterpretation), true); PhotometricInterpretation photometricInterpretationNew = MixUtils.unmarshal(new DOMSource(photometricResult.getNode()), PhotometricInterpretation.class); jhoveOutput.getMix().getBasicImageInformation().getBasicImageCharacteristics().setPhotometricInterpretation(photometricInterpretationNew); } } } /** * Adds an orientation tag to the mix * * @param jhoveOutput */ public static void addOrientation(JHoveOutput jhoveOutput) { if ((jhoveOutput!=null)&&(jhoveOutput.getMix()!=null)) { if ((jhoveOutput.getMix().getImageCaptureMetadata() != null) && (jhoveOutput.getMix().getImageCaptureMetadata().getOrientation() == null)) { TypeOfOrientationType orientation = new TypeOfOrientationType(); orientation.setValue(OrientationType.UNKNOWN); jhoveOutput.getMix().getImageCaptureMetadata().setOrientation(orientation); } } } }