package org.krakenapps.docxcod; import static org.krakenapps.docxcod.util.XMLDocHelper.evaluateXPath; import static org.krakenapps.docxcod.util.XMLDocHelper.newDocumentBuilder; import static org.krakenapps.docxcod.util.XMLDocHelper.newXPath; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileFilter; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PrintWriter; import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.Stack; import java.util.zip.ZipOutputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpressionException; import org.apache.commons.io.FilenameUtils; import org.krakenapps.docxcod.util.XMLDocHelper.NodeListWrapper; import org.krakenapps.docxcod.util.ZipHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class OOXMLPackage { private Logger logger = LoggerFactory.getLogger(getClass().getName()); private File dataDir = null; private Relationship rootRel = null; public OOXMLPackage() { } public File getDataDir() { return dataDir; } public void attach(File targetDir) { if (this.dataDir != null) throw new AlreadyAttachedException(targetDir.getAbsolutePath()); this.dataDir = targetDir; parseRels(); } public void load(InputStream is, File targetDir) throws IOException { if (this.dataDir != null) throw new AlreadyAttachedException(targetDir.getAbsolutePath()); this.dataDir = targetDir; try { ZipHelper.extract(is, dataDir); tidyXMLs(); parseRels(); } catch (IOException e) { throw e; } finally { try { if (is != null) is.close(); } catch (IOException e) { e.printStackTrace(); // ignore } } } private void parseRels() { File rootRelDir = new File(dataDir, "_rels"); File rootRelFile = new File(rootRelDir, ".rels"); Stack<SimpleEntry<File, Relationship>> remaining = new Stack<SimpleEntry<File, Relationship>>(); Set<Relationship> relationships = new HashSet<Relationship>(); rootRel = new Relationship(); remaining.push(new SimpleEntry<File, Relationship>(rootRelFile, rootRel)); try { while (!remaining.empty()) { SimpleEntry<File, Relationship> cur = remaining.pop(); File curFile = cur.getKey(); if (!curFile.exists()) continue; Document doc = newDocumentBuilder().parse(curFile); logger.trace("Parsing: " + curFile); Relationship parent = cur.getValue(); try { XPath xpath = newXPath(doc); NodeList nodeList = evaluateXPath(xpath, "//DEF:Relationship", doc); for (Node n : new NodeListWrapper(nodeList)) { NamedNodeMap attrs = n.getAttributes(); Relationship rel = new Relationship(parent, attrs); parent.children.add(rel); relationships.add(rel); remaining.push( new SimpleEntry<File, Relationship>( makeRelFile(curFile, rel.target), rel)); } } catch (XPathExpressionException e) { logger.warn("invalid rels document: " + curFile); } finally { } } ByteArrayOutputStream out = new ByteArrayOutputStream(1024); // printRelationship(rootRel, new PrintWriter(out)); logger.debug(rootRel.toSummaryString()); } catch (SAXException e) { logger.error("invalid XML doc", e); } catch (IOException e) { logger.error("exception while parsing docx rels", e); } catch (ParserConfigurationException e) { logger.error("exception while parsing docx rels", e); } } private void printRelationship(Relationship rootRel, PrintWriter writer) { Stack<SimpleEntry<Integer, Relationship>> r = new Stack<SimpleEntry<Integer, Relationship>>(); r.push(new SimpleEntry<Integer, Relationship>(0, rootRel)); while (!r.empty()) { SimpleEntry<Integer, Relationship> entry = r.pop(); int depth = entry.getKey(); Relationship rel = entry.getValue(); String prefix = ""; while (depth-- > 0) prefix += "\t"; writer.println(prefix + rel.toString()); for (Relationship c : rel.children) { r.push(new SimpleEntry<Integer, Relationship>(entry.getKey() + 1, c)); } } writer.flush(); } private File makeRelFile(File curFile, String path) { File parent = curFile.getParentFile().getParentFile(); File target = new File(parent, path); File relsParent = target.getParentFile(); File rels = new File(relsParent, "_rels"); return new File(rels, target.getName() + ".rels"); } public void load(InputStream is) throws IOException { try { File tempDir = File.createTempFile("KrakenDocxcodData_", ""); tempDir.delete(); tempDir.mkdirs(); load(is, tempDir); } catch (IOException e) { throw e; } } private void tidyXMLs() { ArrayList<File> files = new ArrayList<File>(); ZipHelper.getFilesRecursivelyIn(dataDir, files); files = filterXMLfiles(files); for (File f : files) { tidyXML(f); } } private void tidyXML(File f) { Document doc; try { doc = newDocumentBuilder().parse(f); if (doc == null) return; Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); transformer.transform(new DOMSource(doc), new StreamResult(f)); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerFactoryConfigurationError e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } } private ArrayList<File> filterXMLfiles(ArrayList<File> files) { ArrayList<File> result = new ArrayList<File>(); for (File f : files) { String n = f.getName(); n = n.toUpperCase(); if (n.endsWith(".XML") || n.endsWith(".RELS")) result.add(f); } return result; } public void save(OutputStream os) { ZipOutputStream zipOs = null; try { parseRels(); zipOs = new ZipOutputStream(os); List<File> files = new ArrayList<File>(); files.add(new File(dataDir, "[Content_Types].xml")); String[] listParts = listParts(""); final Set<String> setOfParts = new HashSet<String>(Arrays.asList(listParts)); for (String part : listParts) { files.add(new File(dataDir, part)); } ZipHelper.getFilesRecursivelyIn(dataDir, files, new FileFilter() { @Override public boolean accept(File pathname) { if (pathname.getName().equals(".rels") || isRelsForParts(pathname)) return true; else return false; } private boolean isRelsForParts(File pathname) { String path = pathname.getPath(); if (path.endsWith(".rels")) { String relParent = path.substring(0, path.length() - ".rels".length()); relParent = relParent.replace("_rels" + File.separator, ""); return setOfParts.contains(ZipHelper.extractSubPath(new File(relParent), dataDir)); } return false; } }); ZipHelper.archive(zipOs, files, dataDir); zipOs.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (zipOs != null) { try { zipOs.close(); } catch (IOException e) { e.printStackTrace(); // ignore } } } } public String addPart(String string, String string2) { return ""; } public String getRelationId(String string) { // TODO Auto-generated method stub return null; } public String findPart(String string, String string2) { // TODO Auto-generated method stub return null; } public Relationship getRootRelationship() { return rootRel; } public String[] listParts(String prefix) { if (rootRel == null) return new String[0]; ArrayList<String> result = new ArrayList<String>(); Stack<Object[]> s = new Stack<Object[]>(); s.push(new Object[] { rootRel, "" }); while (!s.empty()) { Object[] args = s.pop(); Relationship currRel = (Relationship) args[0]; String currPrefix = (String) args[1]; for (Relationship r : currRel.children) { String combinedPath = FilenameUtils.concat(currPrefix, r.target); if (combinedPath.startsWith(prefix)) result.add(combinedPath); s.push(new Object[] { r, FilenameUtils.normalize(FilenameUtils.getFullPath(combinedPath)) }); } } logger.debug("result : " + result); return (String[]) result.toArray(new String[0]); } public static void main(String[] args) { FileOutputStream os = null; FileOutputStream chartOs = null; try { OOXMLPackage pkg = new OOXMLPackage(); OOXMLPackage chart = null; pkg.attach(new File(args[0])); chart = new OOXMLPackage(); File xlsxExtracted = new File(args[0], "word/embeddings/Microsoft_Excel_____1.xlsx.extracted"); if (xlsxExtracted.exists()) { chartOs = new FileOutputStream(new File(args[0], "word/embeddings/Microsoft_Excel_____1.xlsx")); chart.attach(xlsxExtracted); chart.save(chartOs); } os = new FileOutputStream(new File(args[0] + "_mods.docx")); pkg.save(os); System.out.println(args[0] + "_mods.docx saved."); } catch (FileNotFoundException e) { e.printStackTrace(); } finally { if (os != null) try { os.close(); } catch (IOException e) { } if (chartOs != null) try { chartOs.close(); } catch (IOException e) { } } } }