/*
* (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
* Nuxeo
* Florent Guillaume
* Thierry Delprat
*/
package org.nuxeo.ecm.platform.convert.plugins;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.artofsolving.jodconverter.OfficeDocumentConverter;
import org.artofsolving.jodconverter.StandardConversionTask;
import org.artofsolving.jodconverter.document.DocumentFamily;
import org.artofsolving.jodconverter.document.DocumentFormat;
import org.nuxeo.common.Environment;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.Blobs;
import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
import org.nuxeo.ecm.core.convert.api.ConversionException;
import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
import org.nuxeo.ecm.core.convert.extension.ExternalConverter;
import org.nuxeo.ecm.platform.convert.ooomanager.OOoManagerService;
import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
import org.nuxeo.runtime.api.Framework;
/**
* Converter based on JOD which uses an external OpenOffice process to do actual conversions.
*
* @deprecated Since 8.4. Use 'soffice' with {@link org.nuxeo.ecm.platform.convert.plugins.CommandLineConverter} instead
*/
@Deprecated
public class JODBasedConverter implements ExternalConverter {
protected static final String TMP_PATH_PARAMETER = "TmpDirectory";
private static final Log log = LogFactory.getLog(JODBasedConverter.class);
/**
* Boolean conversion parameter for PDF/A-1.
*
* @since 5.6
*/
public static final String PDFA1_PARAM = "PDF/A-1";
/**
* Boolean parameter to force update of the document TOC
*
* @since 5.6
*/
public static final String UPDATE_INDEX_PARAM = StandardConversionTask.UPDATE_DOCUMENT_INDEX;
protected static final Map<DocumentFamily, String> PDF_FILTER_NAMES = new HashMap<>();
{
PDF_FILTER_NAMES.put(DocumentFamily.TEXT, "writer_pdf_Export");
PDF_FILTER_NAMES.put(DocumentFamily.SPREADSHEET, "calc_pdf_Export");
PDF_FILTER_NAMES.put(DocumentFamily.PRESENTATION, "impress_pdf_Export");
PDF_FILTER_NAMES.put(DocumentFamily.DRAWING, "draw_pdf_Export");
}
protected ConverterDescriptor descriptor;
protected String getDestinationMimeType() {
return descriptor.getDestinationMimeType();
}
/**
* Returns the destination format for the given plugin.
* <p>
* It takes the actual destination mimetype from the plugin configuration.
*
* @param sourceFormat the source format
* @param pdfa1 true if PDF/A-1 is required
*/
protected DocumentFormat getDestinationFormat(OfficeDocumentConverter documentConverter,
DocumentFormat sourceFormat, boolean pdfa1) {
String mimeType = getDestinationMimeType();
DocumentFormat destinationFormat = documentConverter.getFormatRegistry().getFormatByMediaType(mimeType);
if ("application/pdf".equals(mimeType)) {
destinationFormat = extendPDFFormat(sourceFormat, destinationFormat, pdfa1);
}
return destinationFormat;
}
protected DocumentFormat extendPDFFormat(DocumentFormat sourceFormat, DocumentFormat defaultFormat, boolean pdfa1) {
DocumentFamily sourceFamily = sourceFormat.getInputFamily();
String sourceMediaType = sourceFormat.getMediaType();
DocumentFormat pdfFormat = new DocumentFormat(pdfa1 ? "PDF/A-1" : "PDF", "pdf", "application/pdf");
Map<DocumentFamily, Map<String, ?>> storePropertiesByFamily = new HashMap<>();
Map<DocumentFamily, Map<String, ?>> defaultStorePropertiesByFamily = defaultFormat.getStorePropertiesByFamily();
for (DocumentFamily family : defaultStorePropertiesByFamily.keySet()) {
if (family.equals(sourceFamily)) {
continue;
}
storePropertiesByFamily.put(family, defaultStorePropertiesByFamily.get(family));
}
storePropertiesByFamily.put(sourceFamily,
extendPDFStoreProperties(sourceMediaType, pdfa1, defaultStorePropertiesByFamily.get(sourceFamily)));
pdfFormat.setStorePropertiesByFamily(storePropertiesByFamily);
return pdfFormat;
}
protected Map<String, Object> extendPDFStoreProperties(String mediatype, boolean pdfa1,
Map<String, ?> originalProperties) {
Map<String, Object> extendedProperties = new HashMap<>();
for (Map.Entry<String, ?> entry : originalProperties.entrySet()) {
extendedProperties.put(entry.getKey(), entry.getValue());
}
if ("text/html".equals(mediatype)) {
extendedProperties.put("FilterName", "writer_web_pdf_Export");
}
if (pdfa1) {
Map<String, Object> filterData = new HashMap<>();
filterData.put("SelectPdfVersion", Integer.valueOf(1)); // PDF/A-1
filterData.put("UseTaggedPDF", Boolean.TRUE); // per spec
extendedProperties.put("FilterData", filterData);
}
return extendedProperties;
}
/**
* Returns the format for the file passed as a parameter.
* <p>
* We will ask the mimetype registry service to sniff its mimetype.
*
* @return DocumentFormat for the given file
*/
private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, File file) {
MimetypeRegistry mimetypeRegistry = Framework.getService(MimetypeRegistry.class);
String mimetypeStr = mimetypeRegistry.getMimetypeFromFile(file);
DocumentFormat format = documentConverter.getFormatRegistry().getFormatByMediaType(mimetypeStr);
return format;
}
/**
* Returns the DocumentFormat for the given mimetype.
*
* @return DocumentFormat for the given mimetype
*/
private static DocumentFormat getSourceFormat(OfficeDocumentConverter documentConverter, String mimetype) {
return documentConverter.getFormatRegistry().getFormatByMediaType(mimetype);
}
@Override
protected void finalize() throws Throwable {
super.finalize();
}
@Override
public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
blobHolder = new UTF8CharsetConverter().convert(blobHolder, parameters);
Blob inputBlob = blobHolder.getBlob();
String blobPath = blobHolder.getFilePath();
if (inputBlob == null) {
return null;
}
OfficeDocumentConverter documentConverter = newDocumentConverter();
// This plugin do deal only with one input source.
String sourceMimetype = inputBlob.getMimeType();
boolean pdfa1 = false;
if (parameters != null) {
Serializable pdfa1Val = parameters.get(PDFA1_PARAM);
if (pdfa1Val instanceof Boolean) {
pdfa1 = ((Boolean) pdfa1Val).booleanValue();
} else if (pdfa1Val instanceof String) {
pdfa1 = Boolean.parseBoolean((String) pdfa1Val);
}
}
File sourceFile = null;
File outFile = null;
File[] files = null;
try {
// If the input blob has the HTML mime type, make sure the
// charset meta is present, add it if not
if ("text/html".equals(sourceMimetype)) {
inputBlob = checkCharsetMeta(inputBlob);
}
// Get original file extension
String ext = inputBlob.getFilename();
int dotPosition = ext.lastIndexOf('.');
if (dotPosition == -1) {
ext = ".bin";
} else {
ext = ext.substring(dotPosition);
}
// Copy in a file to be able to read it several time
sourceFile = Framework.createTempFile("NXJOOoConverterDocumentIn", ext);
InputStream stream = inputBlob.getStream();
FileUtils.copyInputStreamToFile(stream, sourceFile);
stream.close();
DocumentFormat sourceFormat = null;
if (sourceMimetype != null) {
// Try to fetch it from the registry.
sourceFormat = getSourceFormat(documentConverter, sourceMimetype);
}
// If not found in the registry or not given as a parameter.
// Try to sniff ! What does that smell ? :)
if (sourceFormat == null) {
sourceFormat = getSourceFormat(documentConverter, sourceFile);
}
// From plugin settings because we know the destination
// mimetype.
DocumentFormat destinationFormat = getDestinationFormat(documentConverter, sourceFormat, pdfa1);
// allow HTML2PDF filtering
List<Blob> blobs = new ArrayList<>();
if (descriptor.getDestinationMimeType().equals("text/html")) {
String tmpDirPath = getTmpDirectory();
File myTmpDir = new File(tmpDirPath + "/JODConv_" + System.currentTimeMillis());
boolean created = myTmpDir.mkdir();
if (!created) {
throw new IOException("Unable to create temp dir");
}
outFile = new File(myTmpDir.getAbsolutePath() + "/" + "NXJOOoConverterDocumentOut."
+ destinationFormat.getExtension());
created = outFile.createNewFile();
if (!created) {
throw new IOException("Unable to create temp file");
}
log.debug("Input File = " + outFile.getAbsolutePath());
// Perform the actual conversion.
documentConverter.convert(sourceFile, outFile, destinationFormat);
files = myTmpDir.listFiles();
for (File file : files) {
// copy the files to a new tmp location, as we'll delete them
Blob blob;
try (FileInputStream in = new FileInputStream(file)) {
blob = Blobs.createBlob(in);
}
blob.setFilename(file.getName());
blobs.add(blob);
// add a blob for the index
if (file.getName().equals(outFile.getName())) {
Blob indexBlob;
try (FileInputStream in = new FileInputStream(file)) {
indexBlob = Blobs.createBlob(in);
}
indexBlob.setFilename("index.html");
blobs.add(0, indexBlob);
}
}
} else {
outFile = Framework.createTempFile("NXJOOoConverterDocumentOut", '.' + destinationFormat.getExtension());
// Perform the actual conversion.
documentConverter.convert(sourceFile, outFile, destinationFormat, parameters);
Blob blob;
try (FileInputStream in = new FileInputStream(outFile)) {
blob = Blobs.createBlob(in, getDestinationMimeType());
}
blobs.add(blob);
}
return new SimpleCachableBlobHolder(blobs);
} catch (IOException e) {
String msg = String.format("An error occurred trying to convert file %s to from %s to %s", blobPath,
sourceMimetype, getDestinationMimeType());
throw new ConversionException(msg, e);
} finally {
if (sourceFile != null) {
sourceFile.delete();
}
if (outFile != null) {
outFile.delete();
}
if (files != null) {
for (File file : files) {
if (file.exists()) {
file.delete();
}
}
}
}
}
protected OfficeDocumentConverter newDocumentConverter() throws ConversionException {
OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class);
OfficeDocumentConverter documentConverter = oooManagerService.getDocumentConverter();
if (documentConverter == null) {
throw new ConversionException("Could not connect to the remote OpenOffice server");
}
return documentConverter;
}
@SuppressWarnings("hiding")
@Override
public void init(ConverterDescriptor descriptor) {
this.descriptor = descriptor;
}
@Override
public ConverterCheckResult isConverterAvailable() {
ConverterCheckResult result = new ConverterCheckResult();
OOoManagerService oooManagerService = Framework.getService(OOoManagerService.class);
if (!oooManagerService.isOOoManagerStarted()) {
result.setAvailable(false);
}
return result;
}
protected String getTmpDirectory() {
String tmp = null;
Map<String, String> parameters = descriptor.getParameters();
if (parameters != null && parameters.containsKey(TMP_PATH_PARAMETER)) {
tmp = parameters.get(TMP_PATH_PARAMETER);
}
if (tmp == null) {
tmp = Environment.getDefault().getTemp().getPath();
}
return tmp;
}
/**
* Checks if the {@code inputBlob} string contains a {@code charset} meta tag. If not, add it.
*
* @param inputBlob the input blob
* @throws IOException Signals that an I/O exception has occurred.
*/
protected Blob checkCharsetMeta(Blob inputBlob) throws IOException {
String charset = inputBlob.getEncoding();
if (!StringUtils.isEmpty(charset)) {
Pattern charsetMetaPattern = Pattern.compile(String.format("content=\"text/html;\\s*charset=%s\"", charset));
Matcher charsetMetaMatcher = charsetMetaPattern.matcher(inputBlob.getString());
if (!charsetMetaMatcher.find()) {
String charsetMetaTag = String.format(
"<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">", charset);
StringBuilder sb = new StringBuilder(charsetMetaTag);
sb.append(new String(inputBlob.getByteArray(), charset));
Blob blobWithCharsetMetaTag = Blobs.createBlob(sb.toString(), "text/html", charset,
inputBlob.getFilename());
return blobWithCharsetMetaTag;
}
}
return inputBlob;
}
}