/**
* Copyright (c) 2010 DITA for Publishers. Licensed under Apache License 2.
* See license files for details.
*/
package net.sourceforge.dita4publishers.word2dita;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import net.sourceforge.dita4publishers.api.bos.BosMemberValidationException;
import net.sourceforge.dita4publishers.impl.bos.BosConstructionOptions;
import net.sourceforge.dita4publishers.util.DataUtil;
import net.sourceforge.dita4publishers.util.DomException;
import net.sourceforge.dita4publishers.util.DomUtil;
import net.sourceforge.dita4publishers.util.SaxUtil;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* Helper class to validate the XML generated from Word and push
* validation messages back into the Word document.
*/
public class Word2DitaValidationHelper {
public static final String wNs = DocxConstants.nsByPrefix.get("w");
public static SimpleDateFormat timestampFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH':'mm':'ssZ");
public static final Log log = LogFactory.getLog(Word2DitaValidationHelper.class);
/**
* @param zipComponents
* @param logDoc
* @param documentDom
* @param commentsDom
* @param commentTemplate
* @throws XPathExpressionException
*/
static void addMessagesToDocxXml(Document logDoc, Document documentDom,
Document commentsDom, Element commentTemplate)
throws XPathExpressionException {
NodeList messagesNl = logDoc.getDocumentElement().getElementsByTagName("message");
for (int i = 0; i < messagesNl.getLength(); i++) {
Element message = (Element)messagesNl.item(i);
NodeList existingComments = commentsDom.getDocumentElement().getElementsByTagNameNS(wNs, "comment");
String commentId = String.valueOf(existingComments.getLength());
String messageText = message.getTextContent();
addCommentToComments(commentsDom, commentTemplate, messageText,
commentId);
String xpath = message.getAttribute("wordParaXPath");
// System.err.println("xpath=" + xpath);
if (xpath == null || "".equals(xpath.trim())) {
xpath = "/w:document/w:body[1]/w:p[1]";
}
addCommentRefToParaForXPath(documentDom, commentId, xpath);
}
}
/**
* Given a set of validation messages and a DOCX file to which those messages apply,
* creates a Word comment for each message, attached either to the paragraph the
* message points to (by XPath) or to the first paragraph of the document if there
* is not XPath for the message.
* @param docxFile The DOCX file to be updated.
* @param newDocxFile New DOCX file that will be a copy of the input DOCX with comments added.
* @param logDoc The messages document as a DOM.
* @throws ZipException
* @throws IOException
* @throws BosMemberValidationException
* @throws DomException
* @throws Exception
* @throws XPathExpressionException
* @throws FileNotFoundException
*/
public static void addValidationMessagesToDocxFile(File docxFile,
File newDocxFile, Document logDoc) throws ZipException,
IOException, BosMemberValidationException, DomException, Exception,
XPathExpressionException, FileNotFoundException {
String[] catalogs = new String[0];
Document documentDom = null;
Document commentsDom = null;
Map<URI, Document> domCache = new HashMap<URI, Document>();
BosConstructionOptions bosOptions = new BosConstructionOptions(log, domCache);
bosOptions.setCatalogs(catalogs);
ZipFile docxZip = new ZipFile(docxFile);
ZipComponents zipComponents = new ZipComponents(docxZip);
ZipComponent documentXml = zipComponents.getEntry(DocxConstants.DOCUMENT_XML_PATH);
// Load comments template doc:
URL commentsTemplateUrl = DocxConstants.class.getResource("resources/comments.xml");
Element commentTemplate = Word2DitaValidationHelper.getCommentTemplate(commentsTemplateUrl, bosOptions);
commentsDom = Word2DitaValidationHelper.getCommentsDom(bosOptions, zipComponents, commentsTemplateUrl);
documentDom = zipComponents.getDomForZipComponent(bosOptions, DocxConstants.DOCUMENT_XML_PATH);
addMessagesToDocxXml(logDoc, documentDom, commentsDom, commentTemplate);
Word2DitaValidationHelper.saveDomToZipComponent(documentDom, documentXml);
ZipComponent comments = zipComponents.getEntry(DocxConstants.COMMENTS_XML_PATH);
if (comments == null) {
comments = zipComponents.createZipComponent(DocxConstants.COMMENTS_XML_PATH);
}
// System.out.println("[1] Comments.xml: " + IOUtils.toString(DomUtil.serializeToInputStream(commentsDom)));
Word2DitaValidationHelper.saveDomToZipComponent(commentsDom, zipComponents.getEntry(DocxConstants.COMMENTS_XML_PATH));
Word2DitaValidationHelper.addCommentFileRelationship(zipComponents, bosOptions);
Word2DitaValidationHelper.addCommentFileContentType(zipComponents, bosOptions);
Word2DitaValidationHelper.saveZipComponents(zipComponents, newDocxFile);
}
/**
* @return
* @throws IOException
* @throws DomException
* @throws BosMemberValidationException
*/
static Element getCommentTemplate(URL commentsUrl, BosConstructionOptions bosOptions) throws IOException, BosMemberValidationException, DomException {
InputSource commentsTemplateXmlSource = new InputSource(commentsUrl.openStream());
commentsTemplateXmlSource.setSystemId(DocxConstants.COMMENTS_XML_PATH);
Document commentsTemplateDom = DomUtil.getDomForSource(commentsTemplateXmlSource, bosOptions, false, false);
NodeList comments = commentsTemplateDom.getDocumentElement()
.getElementsByTagNameNS(DocxConstants.nsByPrefix.get("w"), "comment");
Element commentTemplate = (Element)comments.item(0);
return commentTemplate;
}
/**
* @param bosOptions
* @param docxZip
* @param commentsTemplateUrl
* @return
* @throws Exception
*/
static Document getCommentsDom(BosConstructionOptions bosOptions,
ZipComponents zipComponents, URL commentsTemplateUrl)
throws Exception {
Document commentsDom;
NodeList comments;
ZipComponent commentsXml = zipComponents.getEntry(DocxConstants.COMMENTS_XML_PATH);
if (commentsXml == null) {
System.err.println("No comments.xml file");
commentsXml = zipComponents.createZipComponent(DocxConstants.COMMENTS_XML_PATH);
// Use the template as the base for new comments.xml DOM:
InputSource templateSource = new InputSource(commentsTemplateUrl.openStream());
templateSource.setSystemId(commentsTemplateUrl.toExternalForm());
commentsDom = DomUtil.getDomForSource(templateSource, bosOptions, false, false);
comments = commentsDom.getDocumentElement()
.getElementsByTagNameNS(DocxConstants.nsByPrefix.get("w"), "comment");
// Remove any existing comments that were in the template:
for (int i = 0; i < comments.getLength(); i++) {
Element comment = (Element)comments.item(i);
commentsDom.getDocumentElement().removeChild(comment);
}
zipComponents.createZipComponent(DocxConstants.COMMENTS_XML_PATH, commentsDom);
} else {
commentsDom = zipComponents.getDomForZipComponent(bosOptions, DocxConstants.COMMENTS_XML_PATH);
}
return commentsDom;
}
/**
* @param doc
* @param zipComponent
* @throws IOException
* @throws Exception
*/
static void saveDomToZipComponent(Document doc,
ZipComponent zipComponent) throws IOException, Exception {
if (zipComponent == null) {
throw new IOException("zipComponent is null");
}
zipComponent.setDom(doc);
}
/**
* @param commentsDom
* @param commentTemplate
* @param messageText
* @param commentId
*/
static void addCommentToComments(Document commentsDom,
Element commentTemplate, String messageText, String commentId) {
Element comment = (Element)commentsDom.importNode(commentTemplate, true);
commentsDom.getDocumentElement().appendChild(comment);
comment.setAttributeNS(wNs, "w:id", commentId);
comment.setAttributeNS(wNs, "w:author", "XML Validator");
comment.setAttributeNS(wNs, "w:initials", "XMLVal");
comment.setAttributeNS(wNs, "w:date", timestampFormatter.format(Calendar.getInstance().getTime()));
Element elem = DataUtil.getElementNS(comment, wNs, "p");
NodeList nl = elem.getElementsByTagNameNS(wNs, "r");
elem = (Element)nl.item(nl.getLength() - 1);
Element text = DataUtil.getElementNS(elem, wNs, "t");
text.setTextContent(messageText);
}
/**
* @param documentDom
* @param xpath
* @return
* @throws XPathExpressionException
*/
static Node getWordParaForXPath(Document documentDom, String xpath)
throws XPathExpressionException {
XPathFactory xpathFactory = DomUtil.getXPathFactory();
XPath xpathObj = xpathFactory.newXPath();
xpathObj.setNamespaceContext(DocxConstants.docxNamespaceContext);
Object result = xpathObj.evaluate(xpath, documentDom, XPathConstants.NODE);
Node node = null;
if(result!=null) {
node = (Node) result;
}
return node;
}
/**
* @param documentDom
* @param commentId
* @param xpath
* @throws XPathExpressionException
*/
static void addCommentRefToParaForXPath(Document documentDom,
String commentId, String xpath) throws XPathExpressionException {
/**
<w:r>
<w:rPr>
<w:rStyle
w:val="CommentReference"/>
</w:rPr>
<w:commentReference
w:id="14"/>
</w:r>
*/
Node node = getWordParaForXPath(documentDom, xpath);
Element p = (Element)node;
Element commentRef = documentDom.createElementNS(wNs, "w:r");
Element elem = (Element)commentRef.appendChild(documentDom.createElementNS(wNs, "w:rPr"));
elem = (Element)elem.appendChild(documentDom.createElementNS(wNs, "w:rStyle"));
elem.setAttributeNS(wNs, "w:val", "CommentReference");
elem = (Element)commentRef.appendChild(documentDom.createElementNS(wNs, "w:commentReference"));
elem.setAttributeNS(wNs, "w:id", commentId);
p.appendChild(commentRef);
}
/**
* @param pkg
* @param bosOptions
* @throws Exception
*/
static void addCommentFileRelationship(ZipComponents zipComponents,
BosConstructionOptions bosOptions) throws Exception {
ZipComponent comp = zipComponents.getEntry(DocxConstants.DOCUMENT_XML_RELS_PATH);
Document doc = zipComponents.getDomForZipComponent(bosOptions, comp);
Element docElem = doc.getDocumentElement();
NodeList nl = docElem.getElementsByTagNameNS(DocxConstants.RELS_NS, "Relationship");
boolean foundCommentRel = false;
for (int i =0; i < nl.getLength(); i++) {
Element elem = (Element)nl.item(i);
String type = elem.getAttribute("Type");
if (DocxConstants.COMMENT_REL_TYPE.equals(type)) {
foundCommentRel = true;
break;
}
}
if (!foundCommentRel) {
Element elem = doc.createElementNS(DocxConstants.RELS_NS, "Relationship");
elem.setAttribute("Type", DocxConstants.COMMENT_REL_TYPE);
elem.setAttribute("Id", "rId" + (nl.getLength() + 1));
elem.setAttribute("Target", "comments.xml");
docElem.appendChild(elem);
// System.out.println(IOUtils.toString(DomUtil.serializeToInputStream(doc, "utf-8")));
comp.setDom(doc);
}
}
/**
* Validates an XML document, capturing the messages into an XML document that includes
* any @xtrc values pointing back into the original DOCX file from which the XML was
* generated. The resulting document can be used to then annotate the original DOCX
* file with messages bound to the original source paragraphs.
* @param messageFile The file to hold the XML message log.
* @param inputUrl The URL of the document to be validated.
* @param catalogs List of entity resolution catalogs to be used by the parser (as for the Resolver class).
* @return DOM document containing the log messages. Also saves the messages to the specified file.
* @throws IOException
* @throws ParserConfigurationException
* @throws Exception
* @throws SAXException
* @throws FileNotFoundException
*/
public static Document validateXml(File messageFile, URL inputUrl, String[] catalogs)
throws IOException, ParserConfigurationException, Exception,
SAXException, FileNotFoundException {
InputSource source = new InputSource(inputUrl.openStream());
Document logDoc = DomUtil.getNewDom();
XMLReader reader = SaxUtil.getXMLFormatLoggingXMLReader(log, logDoc, true, catalogs);
reader.parse(source);
InputStream logStream = DomUtil.serializeToInputStream(logDoc, "utf-8");
System.out.println("Creating message file \"" + messageFile.getAbsolutePath() + "\"...");
OutputStream fos = new FileOutputStream(messageFile);
IOUtils.copy(logStream, fos);
return logDoc;
}
/**
* @param zipComponents
* @param bosOptions
* @throws Exception
*/
public static void addCommentFileContentType(ZipComponents zipComponents,
BosConstructionOptions bosOptions) throws Exception {
/*
* <Override
PartName="/word/comments.xml"
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>
*/
ZipComponent comp = zipComponents.getEntry("[Content_Types].xml");
Document doc = zipComponents.getDomForZipComponent(bosOptions, comp);
Element docElem = doc.getDocumentElement();
String contentTypesNs = "http://schemas.openxmlformats.org/package/2006/content-types";
NodeList nl = docElem.getElementsByTagNameNS(contentTypesNs, "Override");
boolean foundCommentType = false;
for (int i =0; i < nl.getLength(); i++) {
Element elem = (Element)nl.item(i);
String partName = elem.getAttribute("PartName");
if (DocxConstants.COMMENTS_PARTNAME.equals(partName)) {
foundCommentType = true;
break;
}
}
if (!foundCommentType) {
Element elem = doc.createElementNS(contentTypesNs, "Override");
elem.setAttribute("PartName", DocxConstants.COMMENTS_PARTNAME);
elem.setAttribute("ContentType", DocxConstants.COMMENTS_CONTENT_TYPE);
docElem.appendChild(elem);
comp.setDom(doc);
}
}
/**
* @param documentDom
* @param commentsDom
* @param docxZip
* @param zipFile
* @throws FileNotFoundException
* @throws IOException
* @throws Exception
*/
public static void saveZipComponents(ZipComponents zipComponents, File zipFile) throws FileNotFoundException,
IOException, Exception {
ZipOutputStream zipOutStream = new ZipOutputStream(new FileOutputStream(zipFile));
for (ZipComponent comp : zipComponents.getComponents()) {
ZipEntry newEntry = new ZipEntry(comp.getName());
zipOutStream.putNextEntry(newEntry);
if (comp.isDirectory()) {
// Nothing to do.
} else {
// System.out.println(" + [DEBUG] saving component \"" + comp.getName() + "\"");
if (comp.getName().endsWith("document.xml") || comp.getName().endsWith("document.xml.rels")) {
// System.out.println("Handling a file of interest.");
}
InputStream inputStream = comp.getInputStream();
IOUtils.copy(inputStream, zipOutStream);
inputStream.close();
}
}
zipOutStream.close();
}
}