/* * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.xwiki.store.serialization.xml.internal; import java.io.FilterWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.util.Stack; import org.apache.commons.codec.binary.Base64OutputStream; import org.apache.commons.io.IOUtils; import org.apache.commons.io.output.CloseShieldOutputStream; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.io.OutputFormat; /** * Extension to <code>{@link org.dom4j.io.XMLWriter}</code> to allow filling some element content * with an input stream, minimizing the memory footprint of the operation. * <p> * This extension is not intended to be used to format a DOM4J tree to a stream, but to immediately * write out the tags produced without building the document tree in memory. It is not compatible * with the SAX part of the original * <code>{@link org.dom4j.io.XMLWriter}</code>. * </p> * <p> * An improvement to the writeOpen/writeClose functions ensure better handling of independent * opening and closing of tags by maintaining a state stack of opened tags. * New writeDocumentStart/End function also ensure proper starting and * ending of the document it self. * </p> * * @version $Id: af9502c87eb12180b0a6811a389cea408beb94ad $ * @since 3.0M2 */ public class XMLWriter extends org.dom4j.io.XMLWriter { /** * Number of characters wide base64 content will be. */ private static final int BASE64_WIDTH = 80; /** * Platform dependent line seperator. */ private static final byte[] NEWLINE; /** * If the last character written is this then it is safe to indent the next tag. */ private static final char CLOSE_ANGLE_BRACKET = '>'; /** * <code>{@link Stack}</code> of currently opened <code>{@link Element}</code>, the first * <code>{@link Element}</code> is the document root element, * and the top of the stack is the last opened * <code>{@link Element}</code>. */ protected Stack<Element> parent = new Stack<Element>(); /** * Current <code>{@link OutputStream}</code> of this writer. */ private OutputStream out; /** * The underlying writer which is not cast to Writer. */ private LastCharWriter lcWriter; /** True if the last thing written was content from an InputStream and private boolean indentUnsafe; /** Need to catch this exception so this has to be done in an initializer block. */ static { try { NEWLINE = System.getProperty("line.separator").getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException("No UTF-8, this Java VM is not standards compliant!", e); } } /** * Default constructor used by <code>{@link DOMXMLWriter}</code>. * * @see DOMXMLWriter */ protected XMLWriter() { } /** * Create a new XMLWriter writing to a provided OutputStream in a given format. * Note that other constructor of the original DOM4J XMLWriter are unsupported since an * OutputStream is the only way we can support the extensions provided here. * <p> * Note that the writer is buffered and only a call to flush() or writeDocuemntEnd() * will ensure the output has been fully written to the <code>{@link OutputStream}</code>. * </p> * * @param out an <code>{@link OutputStream}</code> where to output the XML produced. * @param format an <code>{@link OutputFormat}</code> defining the encoding that * should be used and esthetics of the produced XML. * @throws UnsupportedEncodingException the requested encoding is unsupported. */ public XMLWriter(final OutputStream out, final OutputFormat format) throws UnsupportedEncodingException { super(out, format); this.lcWriter = new LastCharWriter(super.writer); super.writer = this.lcWriter; this.out = out; } /** * Write the <code>{@link Document}</code> declaration, and its <code>{@link DocumentType}</code> * if available to the output stream. * * @param doc <code>{@link Document}</code> to be started, may specify a * <code>{@link DocumentType}</code>. * @throws IOException a problem occurs during writing */ public void writeDocumentStart(final Document doc) throws IOException { writeDeclaration(); if (doc.getDocType() != null) { super.indent(); super.writeDocType(doc.getDocType()); } } /** * Write the end of the document. * Close all remaining opened <code>{@link Element}</code> including the root element to * terminate the current document. * Also flush the writer to ensure the whole document has been written to the * <code>{@link OutputStream}</code>. * * @param doc <code>{@link Document}</code> to be end, actually unused. * @throws IOException a problem occurs during writing. */ public void writeDocumentEnd(final Document doc) throws IOException { if (!this.parent.isEmpty()) { this.writeClose(this.parent.firstElement()); } super.writePrintln(); super.flush(); } /** * Writes the <code>{@link Element}</code>, including its <code>{@link * Attribute}</code>s, using the <code>{@link Reader}</code> * for its content. * <p> * Note that proper decoding/encoding will occurs during this operation, * converting the encoding of the Reader into the encoding of the Writer. * </p> * * @param element <code>{@link Element}</code> to output. * @param rd <code>{@link Reader}</code> that will be fully read and transfered * into the element content. * @throws IOException a problem occurs during reading or writing. */ public void write(final Element element, final Reader rd) throws IOException { this.writeOpen(element); IOUtils.copy(rd, this.lcWriter); this.writeClose(element); } /** * Writes the <code>{@link Element}</code>, including its <code>{@link * Attribute}</code>s, using the * <code>{@link InputStream}</code> for its content. * <p> * Note that no decoding/encoding of the InputStream will be ensured during this operation. * The byte content is transfered untouched. * </p> * * @param element <code>{@link Element}</code> to output. * @param is <code>{@link InputStream}</code> that will be fully read and transfered into * the element content. * @throws IOException a problem occurs during reading or writing. */ public void write(final Element element, final InputStream is) throws IOException { this.writeOpen(element); super.flush(); IOUtils.copy(is, this.out); // We must prevent indentation even though the // last character written through the writer is a > super.writeClose(element); } /** * Writes the <code>{@link Element}</code>, including its <code>{@link * Attribute}</code>s, using the * <code>{@link InputStream}</code> encoded in Base64 for its content. * * @param element <code>{@link Element}</code> to output. * @param is <code>{@link InputStream}</code> that will be fully read and encoded * in Base64 into the element content. * @throws IOException a problem occurs during reading or writing. */ public void writeBase64(final Element element, final InputStream is) throws IOException { this.writeOpen(element); super.writePrintln(); super.flush(); final Base64OutputStream base64 = new Base64OutputStream(new CloseShieldOutputStream(this.out), true, BASE64_WIDTH, NEWLINE); IOUtils.copy(is, base64); base64.close(); // The last char written was a newline, not a > so it will not indent unless it is done manually. super.setIndentLevel(this.parent.size() - 1); super.indent(); this.writeClose(element); } /** * Writes the opening tag of an {@link Element}. * Includes its {@link Attribute}s but without its content. * <p> * Compared to the DOM4J implementation, this function keeps track of opened elements. * </p> * * @param element <code>{@link Element}</code> to output. * @throws IOException a problem occurs during writing. * @see org.dom4j.io.XMLWriter#writeOpen(org.dom4j.Element) */ @Override public void writeOpen(final Element element) throws IOException { if (this.lcWriter.getLastChar() == CLOSE_ANGLE_BRACKET) { super.writePrintln(); super.indent(); } super.writeOpen(element); this.parent.push(element); super.setIndentLevel(this.parent.size()); } /** * Writes the closing tag of an {@link Element}. * <p> * Compared to the DOM4J implementation, this function ensure closing of all opened * element including the one that is requested to be closed. Also writes a newline and * indents the closing element if required and if the last thing written was not a string. * </p> * * @param element <code>{@link Element}</code> to output. * @throws IOException a problem occurs during writing. * @see org.dom4j.io.XMLWriter#writeClose(org.dom4j.Element) */ @Override public void writeClose(final Element element) throws IOException { while (!this.parent.peek().getQualifiedName().equals(element.getQualifiedName())) { this.writeClose(this.parent.peek()); } super.setIndentLevel(this.parent.size() - 1); if (this.lcWriter.getLastChar() == CLOSE_ANGLE_BRACKET) { super.writePrintln(); super.indent(); } super.writeClose(this.parent.pop()); } /** * An OutputStream which allows you to get the last byte which was written to it. */ private static class LastCharWriter extends FilterWriter { /** * The last byte written to the stream. */ private char lastChar; /** * The Constructor. * * @param toWrap the Writer to send all calls to. */ LastCharWriter(final Writer toWrap) { super(toWrap); } @Override public void write(final char[] buffer, final int offset, final int count) throws IOException { super.write(buffer, offset, count); this.lastChar = buffer[offset + count - 1]; } @Override public void write(final String str, final int offset, final int count) throws IOException { super.write(str, offset, count); this.lastChar = str.charAt(offset + count - 1); } @Override public void write(final int oneChar) throws IOException { super.write(oneChar); this.lastChar = (char) oneChar; } /** * @return the last character written. */ public char getLastChar() { return lastChar; } } }