package org.krakenapps.docxcod; import static org.krakenapps.docxcod.util.XMLDocHelper.evaluateXPath; import static org.krakenapps.docxcod.util.XMLDocHelper.evaluateXPathExpr; import static org.krakenapps.docxcod.util.XMLDocHelper.newDocumentBuilder; import static org.krakenapps.docxcod.util.XMLDocHelper.newXPath; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import org.krakenapps.docxcod.util.CloseableHelper; import org.krakenapps.docxcod.util.XMLDocHelper; import org.krakenapps.docxcod.util.XMLDocHelper.NodeListWrapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class MergeFieldParser implements OOXMLProcessor { private Logger logger = LoggerFactory.getLogger(getClass().getName()); public void process(OOXMLPackage pkg, Map<String, Object> rootMap) { /* * extract contents from merge fields and make magic node containing * them in proper position. */ extractMergeField(pkg); } private void extractMergeField(OOXMLPackage pkg) throws TransformerFactoryConfigurationError { InputStream f = null; try { f = new FileInputStream(new File(pkg.getDataDir(), "word/document.xml")); Document doc = newDocumentBuilder().parse(f); XPath xpath = newXPath(doc); NodeList nodeList = evaluateXPath(xpath, "//*[name()='w:fldChar' or name()='w:instrText' or name()='w:fldSimple']", doc); List<Directive> directives = DirectiveExtractor.parseNodeList(nodeList); for (Directive d : directives) { Node n = d.getPosition(); String directive = d.getDirectiveString(); logger.debug("{} {}", new Object[] { n.getNodeName(), directive }); MakeMagicNode(doc, n, directive); } XMLDocHelper.save(doc, new File(pkg.getDataDir(), "word/document.xml"), true); } catch (Exception e) { e.printStackTrace(); } finally { CloseableHelper.safeClose(f); } } private void MakeMagicNode(Document doc, Node n, String directive) { /* * move all nodes in fldSimple to out of it. and replace text contents * of <w:t> with KMagicNode */ if (n.getNodeName().equals("w:fldSimple")) { /* // @formatter:off <w:fldSimple w:instr="MERGEFIELD "@before-row#list .vars[\"disk-usage-summary\"] as u" \* MERGEFORMAT"> <w:r w:rsidR="00C47145"> <w:rPr> <w:noProof /> </w:rPr> <w:t>«@before-row#list .vars["disk-usage-summa»</w:t> </w:r> </w:fldSimple> */ // @formatter:on logger.debug("fldSimple found"); XPath xpath = newXPath(doc); XPathExpression xpFldSimpleText; try { xpFldSimpleText = xpath.compile("w:r/w:t"); NodeList t = evaluateXPathExpr(xpFldSimpleText, n); t.item(0).setTextContent(""); t.item(0).appendChild(getMagicNode(doc, directive)); } catch (XPathExpressionException e) { // TODO Auto-generated catch block e.printStackTrace(); } // n : w:fldSimple can contain many w:r in its children Node parent = n.getParentNode(); for (Node c : new NodeListWrapper(n.getChildNodes())) { if (c.getNodeName() != null) parent.insertBefore(c.cloneNode(true), n); } parent.removeChild(n); } else if (n.getNodeName().equals("w:fldChar")) { // @formatter:off /* <w:r> <w:fldChar w:fldCharType="begin" /> </w:r> <w:r> <w:instrText xml:space="preserve">MERGEFIELD @after-row#/list \* MERGEFORMAT</w:instrText> </w:r> <w:r> <w:fldChar w:fldCharType="separate" /> </w:r> <w:r> <!-- style of this run will be used --> <w:rPr> <w:noProof /> </w:rPr> <w:t>«@after-row#/list»</w:t> </w:r> <w:r> <w:rPr> <w:noProof /> </w:rPr> <w:fldChar w:fldCharType="end" /> </w:r> */ // @formatter:on Node firstRun = n.getParentNode(); Node sibling = firstRun.getNextSibling(); Node lastRun = null; Node newRun = null; ArrayList<Node> willBeRemoved = new ArrayList<Node>(); willBeRemoved.add(firstRun); while (sibling != null) { if (sibling.getNodeName().equals("w:r")) { // all nodes in w:fldChar except 'newRun' will be removed // finally. willBeRemoved.add(sibling); Node fldCharNode = findFldCharNode(sibling); if (fldCharNode == null) { sibling = sibling.getNextSibling(); continue; } NamedNodeMap attributes = fldCharNode.getAttributes(); Node namedItem = attributes.getNamedItem("w:fldCharType"); if (namedItem == null) { sibling = sibling.getNextSibling(); continue; } if (namedItem.getNodeValue().equals("separate")) { sibling = sibling.getNextSibling(); // newRun will not be removed newRun = sibling; // skip whitespace elements and find first w:r. while (!newRun.getNodeName().equals("w:r")) { newRun = newRun.getNextSibling(); } // replace contents of first w:r. so formating style of // newRun will preserved. Node textNode = findTextNode(newRun); if (textNode == null) { logger.warn("no text-containing run element found with directive. skipped. : {}", directive); continue; } textNode.setTextContent(""); textNode.appendChild(getMagicNode(doc, directive)); continue;// live } if (namedItem.getNodeValue().equals("end")) { lastRun = sibling; break; } } sibling = sibling.getNextSibling(); } willBeRemoved.remove(newRun); if (lastRun != null) { // found matching "end" fldChar Node parentNode = firstRun.getParentNode(); for (Node node : willBeRemoved) { parentNode.removeChild(node); } } else { logger.warn("no matching \"end\" fldChar found"); } } } private static Pattern MAGICNODE_PATTERN = Pattern.compile("<KMagicNode><![CDATA[+(.*)+]]></KMagicNode>"); private static String parseMagicNode(String in) { in = replaceUnicodeQuote(in.trim()); Matcher matcher = MAGICNODE_PATTERN.matcher(in); if (matcher.find() && matcher.groupCount() > 0) { String f = matcher.group(1); if (f == null) f = matcher.group(2); f = f.replaceAll("\\\\(.)", "$1"); return f; } else return null; } private static String replaceUnicodeQuote(String in) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < in.length(); ++i) { int type = Character.getType(in.codePointAt(i)); switch (type) { case Character.FINAL_QUOTE_PUNCTUATION: case Character.INITIAL_QUOTE_PUNCTUATION: builder.append('"'); break; default: builder.append(in.charAt(i)); break; } } return builder.toString(); } // unused but leave here for reference @SuppressWarnings("unused") private static String transformMagicNode(String nodeValue) { String result = nodeValue.trim(); if (result.startsWith("<KMagicNode>")) { result = parseMagicNode(result); } return result; } public static final String UTF8_BOM = "\uFEFF"; private Node findFldCharNode(Node sibling) { for (Node n : new NodeListWrapper(sibling.getChildNodes())) { if (n.getNodeName().equals("w:fldChar")) { return n; } } return null; } private Node findTextNode(Node sibling) { for (Node n : new NodeListWrapper(sibling.getChildNodes())) { if (n.getNodeName().equals("w:t")) { return n; } } return null; } private Node getMagicNode(Document doc, String content) { Element magicNode = doc.createElement("KMagicNode"); magicNode.appendChild(doc.createCDATASection(content)); return magicNode; } }