/* Aalto XML processor * * Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi * * Licensed under the License specified in the file LICENSE which is * included with the source code. * You may not use this file except in compliance with the License. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.fasterxml.aalto.out; import java.io.*; import java.text.MessageFormat; import javax.xml.stream.*; import org.codehaus.stax2.ri.typed.AsciiValueEncoder; import com.fasterxml.aalto.impl.ErrorConsts; import com.fasterxml.aalto.impl.IoStreamException; import com.fasterxml.aalto.util.CharsetNames; import com.fasterxml.aalto.util.XmlChars; import com.fasterxml.aalto.util.XmlConsts; /** * Base class for output type / encoding-specific serializers * used to do actual physical output of serialized xml content. * At this level, no namespace handling is done, and only those * checks directly related to encoding (including optional validity * checks for xml content) are implemented. */ public abstract class XmlWriter extends WNameFactory { protected final static int SURR1_FIRST = 0xD800; protected final static int SURR1_LAST = 0xDBFF; protected final static int SURR2_FIRST = 0xDC00; protected final static int SURR2_LAST = 0xDFFF; protected final static int MIN_ARRAYCOPY = 12; protected final static int ATTR_MIN_ARRAYCOPY = 12; protected final static int DEFAULT_COPYBUFFER_LEN = 512; /* /********************************************************************** /* Basic configuration /********************************************************************** */ final protected WriterConfig _config; /** * Intermediate buffer, in which content (esp. Strings) can be * copied to, before being output. */ protected char[] _copyBuffer; protected final int _copyBufferLen; /** * Indicates whether output is to be compliant; if false, is to be * xml 1.0 compliant, if true, xml 1.1 compliant. */ protected boolean _xml11 = false; protected final boolean _cfgNsAware; /* /********************************************************************** /* Output location info /********************************************************************** */ /** * Number of characters output prior to currently buffered output */ protected int _locPastChars = 0; protected int _locRowNr = 1; /** * Offset of the first character on this line. May be negative, if * the offset was in a buffer that has been flushed out. */ protected int _locRowStartOffset = 0; /* //////////////////////////////////////////////// // Validation //////////////////////////////////////////////// */ final protected boolean _checkContent; final protected boolean _checkNames; /* /////////////////////////////////////////////////////// // Life-cycle /////////////////////////////////////////////////////// */ protected XmlWriter(WriterConfig cfg) { _config = cfg; _copyBuffer = cfg.allocMediumCBuffer(DEFAULT_COPYBUFFER_LEN); _copyBufferLen = _copyBuffer.length; _cfgNsAware = cfg.isNamespaceAware(); _checkContent = cfg.willCheckContent(); _checkNames = cfg.willCheckNames(); } /* /********************************************************************** /* Abstract methods for WNameFactory /********************************************************************** */ @Override public abstract WName constructName(String localName) throws XMLStreamException; @Override public abstract WName constructName(String prefix, String localName) throws XMLStreamException; /* /********************************************************************** /* Extra configuration /********************************************************************** */ public void enableXml11() { _xml11 = true; } protected abstract int getOutputPtr(); /** * Method called by error reporting code, to figure out if a given * character is encodable (without using character entities) with * the current encoding or not. * * @return Character code of the highest character that can be * natively encoded. */ public abstract int getHighestEncodable(); /* //////////////////////////////////////////////////// // Basic methods for communicating with underlying // stream or writer //////////////////////////////////////////////////// */ /** * Method called to flush the buffer(s), and close the output * sink (stream or writer). */ public final void close(boolean forceTargetClose) throws IOException { flush(); _releaseBuffers(); _closeTarget(forceTargetClose || _config.willAutoCloseOutput()); } public void _releaseBuffers() { char[] buf = _copyBuffer; if (buf != null) { _copyBuffer = null; _config.freeMediumCBuffer(buf); } } public abstract void _closeTarget(boolean doClose) throws IOException; public abstract void flush() throws IOException; /* //////////////////////////////////////////////////// // Write methods, non-elem/attr, textual //////////////////////////////////////////////////// */ /** * @param data Contents of the CDATA section to write out * @return offset of the (first) illegal content segment ("]]>") in * passed content, if not in repairing mode; or -1 if none */ public abstract int writeCData(String data) throws IOException, XMLStreamException; public abstract int writeCData(char[] cbuf, int offset, int len) throws IOException, XMLStreamException; public abstract void writeCharacters(String data) throws IOException, XMLStreamException; public abstract void writeCharacters(char[] cbuf, int offset, int len) throws IOException, XMLStreamException; public abstract void writeSpace(String data) throws IOException, XMLStreamException; public abstract void writeSpace(char[] cbuf, int offset, int len) throws IOException, XMLStreamException; /** * Method that will try to output the content as specified. If * the content passed in has embedded "--" in it, it will either * add an intervening space between consequtive hyphens (if content * fixing is enabled), or return the offset of the first hyphen in * multi-hyphen sequence. */ public abstract int writeComment(String data) throws IOException, XMLStreamException; /** * Older "legacy" output method for outputting DOCTYPE declaration. * Assumes that the passed-in String contains a complete DOCTYPE * declaration properly quoted. */ public abstract void writeDTD(String data) throws IOException, XMLStreamException; public abstract void writeDTD(WName rootName, String systemId, String publicId, String internalSubset) throws IOException, XMLStreamException; public abstract void writeEntityReference(WName name) throws IOException, XMLStreamException; public abstract int writePI(WName target, String data) throws IOException, XMLStreamException; public abstract void writeRaw(String str, int offset, int len) throws IOException, XMLStreamException; public abstract void writeRaw(char[] cbuf, int offset, int len) throws IOException, XMLStreamException; public abstract void writeXmlDeclaration(String version, String enc, String standalone) throws IOException, XMLStreamException; /* //////////////////////////////////////////////////// // Write methods, elements //////////////////////////////////////////////////// */ /** *<p> * Note: can throw XMLStreamException, if name checking is enabled, * and name is invalid (name check has to be in this writer, not * caller, since it depends not only on xml limitations, but also * on encoding limitations) */ public abstract void writeStartTagStart(WName name) throws IOException, XMLStreamException; public abstract void writeStartTagEnd() throws IOException, XMLStreamException; public abstract void writeStartTagEmptyEnd() throws IOException, XMLStreamException; public abstract void writeEndTag(WName name) throws IOException, XMLStreamException; /* //////////////////////////////////////////////////// // Write methods, attributes/ns, textual //////////////////////////////////////////////////// */ /** *<p> * Note: can throw XMLStreamException, if name checking is enabled, * and name is invalid (name check has to be in this writer, not * caller, since it depends not only on xml limitations, but also * on encoding limitations) */ public abstract void writeAttribute(WName name, String value) throws IOException, XMLStreamException; public abstract void writeAttribute(WName name, char[] value, int offset, int len) throws IOException, XMLStreamException; /* //////////////////////////////////////////////////// // Write methods, Typed //////////////////////////////////////////////////// */ public abstract void writeTypedValue(AsciiValueEncoder enc) throws IOException, XMLStreamException; public abstract void writeAttribute(WName name, AsciiValueEncoder enc) throws IOException, XMLStreamException; /* //////////////////////////////////////////////////// // Location information //////////////////////////////////////////////////// */ public int getRow() { return _locRowNr; } public int getColumn() { return (getOutputPtr() - _locRowStartOffset) + 1; } public int getAbsOffset() { return _locPastChars +getOutputPtr(); } /* //////////////////////////////////////////////////// // Helper methods for sub-classes //////////////////////////////////////////////////// */ /** * Method used to figure out which part of the Unicode char set the * encoding can natively support. Values returned are 7, 8 and 16, * to indicate (respectively) "ascii", "ISO-Latin" and "native Unicode". * These just best guesses, but should work ok for the most common * encodings. */ public final static int guessEncodingBitSize(WriterConfig cfg) { String enc = cfg.getPreferredEncoding(); if (enc == null || enc.length() == 0) { // let's assume default is UTF-8... return 16; } // Let's see if we can find a normalized name, first: enc = CharsetNames.normalize(enc); // Ok, first, do we have known ones; starting with most common: if (enc == CharsetNames.CS_UTF8) { return 16; // meaning up to 2^16 can be represented natively } else if (enc == CharsetNames.CS_ISO_LATIN1) { return 8; } else if (enc == CharsetNames.CS_US_ASCII) { return 7; } else if (enc == CharsetNames.CS_UTF16 || enc == CharsetNames.CS_UTF16BE || enc == CharsetNames.CS_UTF16LE || enc == CharsetNames.CS_UTF32BE || enc == CharsetNames.CS_UTF32LE) { return 16; } /* Above and beyond well-recognized names, it might still be * good to have more heuristics for as-of-yet unhandled cases... * But, it's probably easier to only assume 8-bit clean (could * even make it just 7, let's see how this works out) */ return 8; } /** * This is the method called when an output method call violates * name well-formedness checks * and name validation is enabled. */ protected void reportNwfName(String msg) throws XMLStreamException { throwOutputError(msg); } protected void reportNwfName(String msg, Object arg) throws XMLStreamException { throwOutputError(msg, arg); } protected void reportNwfContent(String msg) throws XMLStreamException { throwOutputError(msg); } protected void reportNwfContent(String format, Object arg1, Object arg2) throws XMLStreamException { String msg = MessageFormat.format(format, new Object[] { arg1, arg2 }); reportNwfContent(msg); } protected void reportFailedEscaping(String type, int ch) throws XMLStreamException { // Quick separation of high-range invalid chars: if (ch == 0xFFFE || ch == 0xFFFF || (ch >= SURR1_FIRST && ch <= SURR2_LAST)) { reportInvalidChar(ch); } // One more check: is it only escapable in xml 1.1? if (ch < 0x0020) { if (ch == 0 || !_config.isXml11()) { reportInvalidChar(ch); } } String msg = MessageFormat.format(ErrorConsts.WERR_NO_ESCAPING, new Object[] { type, new Integer(ch) }); reportNwfContent(msg); } protected void reportInvalidEmptyName() throws XMLStreamException { reportNwfContent("Empty String is not a valid name (local name, prefix or processing instruction target)"); } protected void reportInvalidChar(int c) throws XMLStreamException { // First, let's flush any output we may have, to make debugging easier try { flush(); } catch (IOException ioe) { throw new IoStreamException(ioe); } if (c == 0) { reportNwfContent("Invalid null character in text to output"); } if (c < ' ' || (c >= 0x7F && c <= 0x9F)) { String msg = "Invalid white space character (0x"+Integer.toHexString(c)+") in text to output"; if (_xml11) { msg += " (can only be output using character entity)"; } reportNwfContent(msg); } if (c > XmlConsts.MAX_UNICODE_CHAR) { reportNwfContent("Illegal unicode character point (0x"+Integer.toHexString(c)+") to output; max is 0x10FFFF as per RFC 3629"); } /* Surrogate pair in non-quotable (not text or attribute value) * content, and non-unicode encoding (ISO-8859-x, Ascii)? */ if (c >= SURR1_FIRST && c <= SURR2_LAST) { reportNwfContent("Illegal surrogate pair -- can only be output via character entities (for current encoding), which are not allowed in this content"); } // Just something that the encoding can not express natively? reportNwfContent("Invalid XML character "+XmlChars.getCharDesc(c)+" in text to output"); } protected void throwOutputError(String msg) throws XMLStreamException { // First, let's flush any output we may have, to make debugging easier try { flush(); } catch (IOException ioe) { throw new IoStreamException(ioe); } throw new XMLStreamException(msg); } protected void throwOutputError(String format, Object arg) throws XMLStreamException { String msg = MessageFormat.format(format, new Object[] { arg }); throwOutputError(msg); } }