/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.wysiwyg.server.internal.plugin.importer;
import java.io.InputStream;
import java.io.StringReader;
import java.util.Map;
import javax.inject.Inject;
import javax.inject.Singleton;
import org.apache.commons.lang3.StringUtils;
import org.artofsolving.jodconverter.document.DocumentFamily;
import org.artofsolving.jodconverter.document.DocumentFormat;
import org.slf4j.Logger;
import org.w3c.dom.Document;
import org.xwiki.bridge.DocumentAccessBridge;
import org.xwiki.component.annotation.Component;
import org.xwiki.component.manager.ComponentManager;
import org.xwiki.gwt.wysiwyg.client.plugin.importer.ImportService;
import org.xwiki.gwt.wysiwyg.client.wiki.Attachment;
import org.xwiki.model.reference.AttachmentReference;
import org.xwiki.model.reference.DocumentReference;
import org.xwiki.officeimporter.builder.PresentationBuilder;
import org.xwiki.officeimporter.builder.XDOMOfficeDocumentBuilder;
import org.xwiki.officeimporter.document.XDOMOfficeDocument;
import org.xwiki.officeimporter.server.OfficeServer;
import org.xwiki.wysiwyg.server.wiki.EntityReferenceConverter;
import org.xwiki.xml.html.HTMLCleaner;
import org.xwiki.xml.html.HTMLCleanerConfiguration;
import org.xwiki.xml.html.HTMLUtils;
/**
* XWiki specific implementation of {@link ImportService}.
*
* @version $Id: e5574ef1134e24963348c7be527f1891b487829f $
*/
@Component
@Singleton
public class XWikiImportService implements ImportService
{
/**
* Logger.
*/
@Inject
private Logger logger;
/**
* The component manager. We need it because we have to access some components dynamically.
*/
@Inject
private ComponentManager componentManager;
/**
* The component used to access the content of the office attachments.
*/
@Inject
private DocumentAccessBridge documentAccessBridge;
/**
* The component used to convert office presentations to XDOM.
*/
@Inject
private PresentationBuilder presentationBuilder;
/**
* The component used to convert office text documents to XDOM.
*/
@Inject
private XDOMOfficeDocumentBuilder documentBuilder;
/**
* Used to access the document converter.
*/
@Inject
private OfficeServer officeServer;
/**
* The object used to convert between client and server entity reference.
*/
@Inject
private EntityReferenceConverter entityReferenceConverter;
/**
* Used to import an office file using the office macro.
*/
private OfficeMacroImporter officeMacroImporter;
@Override
public String cleanOfficeHTML(String htmlPaste, String cleanerHint, Map<String, String> cleaningParams)
{
try {
HTMLCleaner cleaner = componentManager.getInstance(HTMLCleaner.class, cleanerHint);
HTMLCleanerConfiguration configuration = cleaner.getDefaultConfiguration();
configuration.setParameters(cleaningParams);
// Wrap the paste content in a DIV element because the DIV element, unlike BODY for instance, accepts both
// in-line and block content. The way we prevent the creation of a paragraph when in-line content is pasted.
StringReader input = new StringReader("<div>" + htmlPaste + "</div>");
Document cleanedDocument = cleaner.clean(input, configuration);
HTMLUtils.stripFirstElementInside(cleanedDocument, "body", "div");
HTMLUtils.stripHTMLEnvelope(cleanedDocument);
// Remove the HTML wrapper and the new lines before/after it.
String output = HTMLUtils.toString(cleanedDocument, true, true).trim();
return StringUtils.removeEndIgnoreCase(StringUtils.removeStartIgnoreCase(output, "<html>"), "</html>");
} catch (Exception e) {
this.logger.error("Exception while cleaning office HTML content.", e);
throw new RuntimeException(e.getLocalizedMessage());
}
}
@Override
public String officeToXHTML(Attachment attachment, Map<String, String> cleaningParams)
{
org.xwiki.gwt.wysiwyg.client.wiki.AttachmentReference clientAttachmentReference =
new org.xwiki.gwt.wysiwyg.client.wiki.AttachmentReference(attachment.getReference());
try {
return importAttachment(entityReferenceConverter.convert(clientAttachmentReference), cleaningParams);
} catch (Exception e) {
this.logger.error("Exception while importing office document [{}]",
clientAttachmentReference.getFileName(), e);
throw new RuntimeException(e.getLocalizedMessage());
}
}
/**
* Imports an office document that was previously attached to a wiki page.
*
* @param attachmentReference specifies the office document to import
* @param parameters import parameters; {@code filterStyles} controls whether styles are filtered when importing
* office text documents; {@code useOfficeViewer} controls whether the office viewer macro is used
* instead of converting the content of the office file to wiki syntax
* @return the annotated XHTML text obtained from the specified office document
* @throws Exception if importing the specified attachment fails
*/
private String importAttachment(AttachmentReference attachmentReference, Map<String, String> parameters)
throws Exception
{
boolean filterStyles = "strict".equals(parameters.get("filterStyles"));
if (Boolean.valueOf(parameters.get("useOfficeViewer"))) {
if (officeMacroImporter == null) {
officeMacroImporter = new OfficeMacroImporter(componentManager);
}
return officeMacroImporter.render(officeMacroImporter.buildXDOM(attachmentReference, filterStyles));
} else {
return convertAttachmentContent(attachmentReference, filterStyles);
}
}
/**
* Converts the content of the specified office file to wiki syntax.
*
* @param attachmentReference specifies the office file whose content should be converted
* @param filterStyles controls whether styles are filtered when converting the HTML produced by the office server
* to wiki syntax
* @return the annotated XHTML text obtained from the specified office document
* @throws Exception if converting the content of the specified attachment fails
*/
private String convertAttachmentContent(AttachmentReference attachmentReference, boolean filterStyles)
throws Exception
{
InputStream officeFileStream = documentAccessBridge.getAttachmentContent(attachmentReference);
String officeFileName = attachmentReference.getName();
DocumentReference targetDocRef = attachmentReference.getDocumentReference();
XDOMOfficeDocument xdomOfficeDocument;
if (isPresentation(attachmentReference.getName())) {
xdomOfficeDocument = presentationBuilder.build(officeFileStream, officeFileName, targetDocRef);
} else {
xdomOfficeDocument = documentBuilder.build(officeFileStream, officeFileName, targetDocRef, filterStyles);
}
// Attach the images extracted from the imported office document to the target wiki document.
for (Map.Entry<String, byte[]> artifact : xdomOfficeDocument.getArtifacts().entrySet()) {
AttachmentReference artifactReference = new AttachmentReference(artifact.getKey(), targetDocRef);
documentAccessBridge.setAttachmentContent(artifactReference, artifact.getValue());
}
return xdomOfficeDocument.getContentAsString("annotatedxhtml/1.0");
}
/**
* @param fileName a file name
* @return {@code true} if the specified file is an office presentation, {@code false} otherwise
*/
private boolean isPresentation(String fileName)
{
String extension = fileName.substring(fileName.lastIndexOf('.') + 1);
if (officeServer.getConverter() != null) {
DocumentFormat format = officeServer.getConverter().getFormatRegistry().getFormatByExtension(extension);
return format != null && format.getInputFamily() == DocumentFamily.PRESENTATION;
}
return false;
}
}