/** * Copyright (c) 2000-present Liferay, Inc. All rights reserved. * * This library is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. */ package com.liferay.portal.metadata; import com.liferay.portal.fabric.InputResource; import com.liferay.portal.kernel.exception.SystemException; import com.liferay.portal.kernel.io.DummyWriter; import com.liferay.portal.kernel.log.Log; import com.liferay.portal.kernel.log.LogFactoryUtil; import com.liferay.portal.kernel.process.ClassPathUtil; import com.liferay.portal.kernel.process.ProcessCallable; import com.liferay.portal.kernel.process.ProcessChannel; import com.liferay.portal.kernel.process.ProcessException; import com.liferay.portal.kernel.process.ProcessExecutorUtil; import com.liferay.portal.kernel.util.ArrayUtil; import com.liferay.portal.kernel.util.FileUtil; import com.liferay.portal.util.PropsValues; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.concurrent.Future; import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.poi.EncryptedDocumentException; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.XMPDM; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.WriteOutContentHandler; import org.xml.sax.ContentHandler; /** * @author Miguel Pastor * @author Alexander Chow * @author Shuyang Zhou */ public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor { public void setParser(Parser parser) { _parser = parser; } protected static Metadata extractMetadata( File file, Metadata metadata, Parser parser) throws IOException { if (metadata == null) { metadata = new Metadata(); } ParseContext parserContext = new ParseContext(); parserContext.set(Parser.class, parser); ContentHandler contentHandler = new WriteOutContentHandler( new DummyWriter()); try (InputStream inputStream = new FileInputStream(file)) { parser.parse(inputStream, contentHandler, metadata, parserContext); } catch (Exception e) { Throwable throwable = ExceptionUtils.getRootCause(e); if (throwable instanceof EncryptedDocumentException || throwable instanceof UnsupportedZipFeatureException) { if (_log.isWarnEnabled()) { _log.warn( "Unable to extract metadata from an encrypted file"); } } else if (e instanceof TikaException) { if (_log.isWarnEnabled()) { _log.warn("Unable to extract metadata"); } } else { _log.error(e, e); } throw new IOException(e); } // Remove potential security risks metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName()); metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName()); return metadata; } @Override protected Metadata extractMetadata( String extension, String mimeType, File file) { Metadata metadata = super.extractMetadata(extension, mimeType, file); boolean forkProcess = false; if (PropsValues.TEXT_EXTRACTION_FORK_PROCESS_ENABLED) { if (ArrayUtil.contains( PropsValues.TEXT_EXTRACTION_FORK_PROCESS_MIME_TYPES, mimeType)) { forkProcess = true; } } if (forkProcess) { ExtractMetadataProcessCallable extractMetadataProcessCallable = new ExtractMetadataProcessCallable(file, metadata, _parser); try { ProcessChannel<Metadata> processChannel = ProcessExecutorUtil.execute( ClassPathUtil.getPortalProcessConfig(), extractMetadataProcessCallable); Future<Metadata> future = processChannel.getProcessNoticeableFuture(); return future.get(); } catch (Exception e) { throw new SystemException(e); } } try { return extractMetadata(file, metadata, _parser); } catch (IOException ioe) { throw new SystemException(ioe); } } @Override protected Metadata extractMetadata( String extension, String mimeType, InputStream inputStream) { File file = FileUtil.createTempFile(); try { FileUtil.write(file, inputStream); return extractMetadata(extension, mimeType, file); } catch (Exception e) { throw new SystemException(e); } finally { file.delete(); } } private static final Log _log = LogFactoryUtil.getLog( TikaRawMetadataProcessor.class); private Parser _parser; private static class ExtractMetadataProcessCallable implements ProcessCallable<Metadata> { public ExtractMetadataProcessCallable( File file, Metadata metadata, Parser parser) { _file = file; _metadata = metadata; _parser = parser; } @Override public Metadata call() throws ProcessException { try { return extractMetadata(_file, _metadata, _parser); } catch (IOException ioe) { throw new ProcessException(ioe); } } private static final long serialVersionUID = 1L; @InputResource private final File _file; private final Metadata _metadata; private final Parser _parser; } }