/**
*
*/
package org.docx4j.convert.out.html;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.docx4j.Docx4J;
import org.docx4j.TraversalUtil;
import org.docx4j.XmlUtils;
import org.docx4j.convert.out.HTMLSettings;
import org.docx4j.convert.out.common.AbstractVisitorExporterGenerator;
import org.docx4j.model.images.ConversionImageHandler;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
/**
*
* Running Xalan extension functions on Android is problematic:
*
* http://stackoverflow.com/questions/10579339/is-it-possible-to-call-a-java-extension-function-from-xalan-on-android
*
* so this uses TraversalUtils to generate HTML output
* without any need for Xalan or XSLT.
*
* We could use a simple JAXB model of HTML, but instead
* this class uses org.w3c.dom to construct the HTML document.
*
* This class might be neater if it used CompoundTraversalUtilVisitorCallback,
* but it would be less obvious what is going on.
*
* @author jharrop
* @deprecated
*/
public class HtmlExporterNonXSLT {
protected static final int DEFAULT_OUTPUT_SIZE = 102400;
private static Logger log = LoggerFactory.getLogger(HtmlExporterNonXSLT.class);
protected static String inputfilepath;
protected static String outputfilepath;
protected HTMLSettings htmlSettings = null;
public HtmlExporterNonXSLT(WordprocessingMLPackage wordMLPackage,
ConversionImageHandler conversionImageHandler) {
htmlSettings = new HTMLSettings();
htmlSettings.setWmlPackage(wordMLPackage);
htmlSettings.setImageHandler(conversionImageHandler);
}
/**
* Generate HTML for the entire MainDocumentPart.
* @return
*/
public org.w3c.dom.Document export() {
ByteArrayOutputStream outStream = new ByteArrayOutputStream(DEFAULT_OUTPUT_SIZE);
Document ret = null;
try {
Docx4J.toHTML(htmlSettings, outStream, Docx4J.FLAG_EXPORT_PREFER_NONXSL);
ret = XmlUtils.getNewDocumentBuilder().parse(new ByteArrayInputStream(outStream.toByteArray()));
} catch (Docx4JException e) {
log.error("Exception exporting document: " + e.getMessage(), e);
} catch (SAXException e) {
log.error("Exception parsing document: " + e.getMessage(), e);
} catch (IOException e) {
log.error("Exception parsing document: " + e.getMessage(), e);
}
return ret;
}
public String getCss() {
WordprocessingMLPackage wmlPackage = (WordprocessingMLPackage)htmlSettings.getWmlPackage();
StringBuilder buffer = new StringBuilder();
HtmlCssHelper.createCssForStyles(wmlPackage,
wmlPackage.getMainDocumentPart().getStyleTree(),
buffer);
return buffer.toString();
}
/** Generate HTML for the specified content.<br>
* Don't expect this method to work, the conversion process relies on a structure
* that has been preprocessed and is based on the complete document. Some examples
* where this method probably fails with a NPE:
* <ul>
* <li>images</li>
* <li>fields</li>
* <li>bookmarks</li>
* <li>links</li>
* </ul>
*
* @param blockLevelContent
* @return
*/
public org.w3c.dom.Document export(Object blockLevelContent, String cssClass, String cssId) {
HTMLConversionContext conversionContext =
new HTMLConversionContext(htmlSettings, null, null);
Document document = XmlUtils.neww3cDomDocument();
Element parentNode = document.createElement("div");
AbstractVisitorExporterGenerator<HTMLConversionContext> generator = null;
if (cssClass!=null) {
parentNode.setAttribute("class", cssClass);
}
if (cssId!=null) {
parentNode.setAttribute("id", cssId);
}
document.appendChild(parentNode);
generator = HTMLExporterVisitorGenerator.GENERATOR_FACTORY.
createInstance(conversionContext, document, parentNode);
new TraversalUtil(blockLevelContent, generator);
return document;
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
inputfilepath = System.getProperty("user.dir")
// + "/hr.docx";
// + "/sample-docs/word/sample-docx.docx";
// + "/sample-docs/word/2003/word2003-vml.docx";
// + "/table-nested.docx";
+ "/hlink.docx";
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage
.load(new java.io.File(inputfilepath));
HtmlExporterNonXSLT withoutXSLT = new HtmlExporterNonXSLT(wordMLPackage, new HTMLConversionImageHandler("c:\\temp", "/bar", true) );
log.info(XmlUtils.w3CDomNodeToString(
withoutXSLT.export()));
// Wondering where <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
// comes from? See http://stackoverflow.com/questions/1409091/how-do-i-prevent-the-java-xml-transformer-using-html-method-from-adding-meta
}
}