/* * The Apache Software License, Version 1.1 * * * Copyright (c) 1999 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xalan" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, Lotus * Development Corporation., http://www.lotus.com. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.xalan.serialize; import java.util.Stack; import java.io.Writer; import java.io.IOException; import java.util.Hashtable; import java.util.Vector; import java.util.Properties; import java.util.BitSet; import org.xml.sax.*; import org.apache.xml.utils.BoolStack; import org.apache.xml.utils.Trie; import org.apache.xml.utils.FastStringBuffer; import org.apache.xalan.res.XSLMessages; import org.apache.xpath.res.XPATHErrorResources; import org.apache.xml.utils.StringToIntTable; import org.apache.xalan.templates.OutputProperties; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; /** * <meta name="usage" content="general"/> * SerializerToHTML formats SAX-style events into XML. */ public class SerializerToHTML extends SerializerToXML { /** State stack to keep track of if the current element has output * escaping disabled. */ private BoolStack m_isRawStack = new BoolStack(); /** True if the current element is a block element. (seems like * this needs to be a stack. -sb). */ private boolean m_inBlockElem = false; /** * Map that tells which XML characters should have special treatment, and it * provides character to entity name lookup. */ protected static CharInfo m_htmlcharInfo = new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ static Trie m_elementFlags = new Trie(); static { // HTML 4.0 loose DTD m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); m_elementFlags.put("FRAME", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("ISINDEX", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("APPLET", new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); // HTML 4.0 strict DTD m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("SUP", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put("SUB", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put("SPAN", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put("BDO", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put("BR", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("ADDRESS", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("DIV", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); m_elementFlags.put("MAP", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); m_elementFlags.put("AREA", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("LINK", new ElemDesc(0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("IMG", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.EMPTY | ElemDesc.WHITESPACESENSITIVE)); m_elementFlags.put("OBJECT", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.HEADMISC | ElemDesc.WHITESPACESENSITIVE)); m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); m_elementFlags.put("HR", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET | ElemDesc.EMPTY)); m_elementFlags.put("P", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("H1", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put("H2", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put("H3", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put("H4", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put("H5", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put("H6", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put("PRE", new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); m_elementFlags.put("Q", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put("BLOCKQUOTE", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("INS", new ElemDesc(0)); m_elementFlags.put("DEL", new ElemDesc(0)); m_elementFlags.put("DL", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("OL", new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); m_elementFlags.put("UL", new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); m_elementFlags.put("INPUT", new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); m_elementFlags.put("SELECT", new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); m_elementFlags.put("OPTGROUP", new ElemDesc(0)); m_elementFlags.put("OPTION", new ElemDesc(0)); m_elementFlags.put("TEXTAREA", new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); m_elementFlags.put("FIELDSET", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); m_elementFlags.put("LEGEND", new ElemDesc(0)); m_elementFlags.put("BUTTON", new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); m_elementFlags.put("TABLE", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("COL", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("TH", new ElemDesc(0)); m_elementFlags.put("TD", new ElemDesc(0)); m_elementFlags.put("HEAD", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("BASE", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("META", new ElemDesc(0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("STYLE", new ElemDesc(0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); m_elementFlags.put("SCRIPT", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.HEADMISC | ElemDesc.RAW)); m_elementFlags.put("NOSCRIPT", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); // From "John Ky" <hand@syd.speednet.com.au // Transitional Document Type Definition () // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); // From "John Ky" <hand@syd.speednet.com.au m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); ElemDesc elemDesc; elemDesc = (ElemDesc) m_elementFlags.get("BASE"); elemDesc.setAttr("HREF", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("BLOCKQUOTE"); elemDesc.setAttr("CITE", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("Q"); elemDesc.setAttr("CITE", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("INS"); elemDesc.setAttr("CITE", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("DEL"); elemDesc.setAttr("CITE", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("A"); elemDesc.setAttr("HREF", ElemDesc.ATTRURL); elemDesc.setAttr("NAME", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("INPUT"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); elemDesc = (ElemDesc) m_elementFlags.get("SELECT"); elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP"); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc = (ElemDesc) m_elementFlags.get("OPTION"); elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA"); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); elemDesc = (ElemDesc) m_elementFlags.get("BUTTON"); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("FOR", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("IMG"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("OBJECT"); elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); elemDesc.setAttr("DATA", ElemDesc.ATTRURL); elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("FORM"); elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); elemDesc = (ElemDesc) m_elementFlags.get("HEAD"); elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> elemDesc = (ElemDesc) m_elementFlags.get("FRAME"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); } /** * Dummy element for elements not found. */ static private ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); /** True if URLs should be specially escaped with the %xx form. */ private boolean m_specialEscapeURLs = true; /** True if the META tag should be omitted. */ private boolean m_omitMetaTag = false; /** * Tells if the formatter should use special URL escaping. * * @param bool True if URLs should be specially escaped with the %xx form. */ public void setSpecialEscapeURLs(boolean bool) { m_specialEscapeURLs = bool; } /** * Tells if the formatter should omit the META tag. * * @param bool True if the META tag should be omitted. */ public void setOmitMetaTag(boolean bool) { m_omitMetaTag = bool; } /** * Specifies an output format for this serializer. It the * serializer has already been associated with an output format, * it will switch to the new format. This method should not be * called while the serializer is in the process of serializing * a document. * * @param format The output format to use */ public void setOutputFormat(Properties format) { m_specialEscapeURLs = OutputProperties.getBooleanProperty(OutputProperties.S_USE_URL_ESCAPING, format); m_omitMetaTag = OutputProperties.getBooleanProperty(OutputProperties.S_OMIT_META_TAG, format); super.setOutputFormat(format); } /** * Tells if the formatter should use special URL escaping. * * @return True if URLs should be specially escaped with the %xx form. */ public boolean getSpecialEscapeURLs() { return m_specialEscapeURLs; } /** * Tells if the formatter should omit the META tag. * * @return True if the META tag should be omitted. */ public boolean getOmitMetaTag() { return m_omitMetaTag; } /** * Get a description of the given element. * * @param name non-null name of element, case insensitive. * * @return non-null reference to ElemDesc, which may be m_dummy if no * element description matches the given name. */ ElemDesc getElemDesc(String name) { if (null != name) { Object obj = m_elementFlags.get(name); if (null != obj) return (ElemDesc) obj; } return m_dummy; } /** * Default constructor. */ public SerializerToHTML() { super(); m_charInfo = m_htmlcharInfo; } /** The name of the current element. */ private String m_currentElementName = null; /** * Receive notification of the beginning of a document. * * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @throws org.xml.sax.SAXException */ public void startDocument() throws org.xml.sax.SAXException { m_needToOutputDocTypeDecl = true; m_startNewLine = false; m_shouldNotWriteXMLHeader = true; if (true == m_needToOutputDocTypeDecl) { if ((null != m_doctypeSystem) || (null != m_doctypePublic)) { accum("<!DOCTYPE HTML"); if (null != m_doctypePublic) { accum(" PUBLIC \""); accum(m_doctypePublic); accum("\""); } if (null != m_doctypeSystem) { if (null == m_doctypePublic) accum(" SYSTEM \""); else accum(" \""); accum(m_doctypeSystem); accum("\""); } accum(">"); outputLineSep(); } } m_needToOutputDocTypeDecl = false; } /** * Receive notification of the beginning of an element. * * * @param namespaceURI * @param localName * @param name The element type name. * @param atts The attributes attached to the element, if any. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @see #endElement * @see org.xml.sax.AttributeList */ public void startElement( String namespaceURI, String localName, String name, Attributes atts) throws org.xml.sax.SAXException { // System.out.println("SerializerToHTML#startElement("+namespaceURI+", "+localName+", "+name+", ...);"); if (null != namespaceURI && namespaceURI.length() > 0) { super.startElement(namespaceURI, localName, name, atts); return; } boolean savedDoIndent = m_doIndent; boolean noLineBreak; writeParentTagEnd(); pushState( namespaceURI, localName, m_cdataSectionNames, m_cdataSectionStates); // pushState(namespaceURI, localName, m_format.getNonEscapingElements(), // m_disableOutputEscapingStates); ElemDesc elemDesc = getElemDesc(name); // ElemDesc parentElemDesc = getElemDesc(m_currentElementName); boolean isBlockElement = elemDesc.is(ElemDesc.BLOCK); boolean isHeadElement = elemDesc.is(ElemDesc.HEADELEM); // boolean isWhiteSpaceSensitive = elemDesc.is(ElemDesc.WHITESPACESENSITIVE); if (m_ispreserve) m_ispreserve = false; else if (m_doIndent && (null != m_currentElementName) && (!m_inBlockElem || isBlockElement) /* && !isWhiteSpaceSensitive */ ) { m_startNewLine = true; indent(m_currentIndent); } m_inBlockElem = !isBlockElement; m_isRawStack.push(elemDesc.is(ElemDesc.RAW)); m_currentElementName = name; // m_parents.push(m_currentElementName); this.accum('<'); this.accum(name); int nAttrs = atts.getLength(); for (int i = 0; i < nAttrs; i++) { processAttribute(atts.getQName(i), elemDesc, atts.getValue(i)); } // Flag the current element as not yet having any children. openElementForChildren(); m_currentIndent += this.m_indentAmount; m_isprevtext = false; m_doIndent = savedDoIndent; if (isHeadElement) { writeParentTagEnd(); if (!m_omitMetaTag) { if (m_doIndent) indent(m_currentIndent); accum( "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); // String encoding = Encodings.getMimeEncoding(m_encoding).toLowerCase(); String encoding = Encodings.getMimeEncoding(m_encoding); accum(encoding); accum('"'); accum('>'); } } } /** * Receive notification of the end of an element. * * * @param namespaceURI * @param localName * @param name The element type name * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. */ public void endElement(String namespaceURI, String localName, String name) throws org.xml.sax.SAXException { // System.out.println("SerializerToHTML#endElement("+namespaceURI+", "+localName+", "+name+");"); if (null != namespaceURI && namespaceURI.length() > 0) { super.endElement(namespaceURI, localName, name); return; } m_currentIndent -= this.m_indentAmount; boolean hasChildNodes = childNodesWereAdded(); // System.out.println(m_currentElementName); // m_parents.pop(); m_isRawStack.pop(); ElemDesc elemDesc = getElemDesc(name); // ElemDesc parentElemDesc = getElemDesc(m_currentElementName); boolean isBlockElement = elemDesc.is(ElemDesc.BLOCK); boolean shouldIndent = false; if (m_ispreserve) { m_ispreserve = false; } else if (m_doIndent && (!m_inBlockElem || isBlockElement)) { m_startNewLine = true; shouldIndent = true; // indent(m_currentIndent); } m_inBlockElem = !isBlockElement; if (hasChildNodes) { if (shouldIndent) indent(m_currentIndent); this.accum("</"); this.accum(name); this.accum('>'); m_currentElementName = name; } else { if (!elemDesc.is(ElemDesc.EMPTY)) { this.accum('>'); // As per Dave/Paul recommendation 12/06/2000 // if (shouldIndent) // indent(m_currentIndent); this.accum('<'); this.accum('/'); this.accum(name); this.accum('>'); } else { this.accum('>'); } } if (elemDesc.is(ElemDesc.WHITESPACESENSITIVE)) m_ispreserve = true; if (hasChildNodes) { if (!m_preserves.isEmpty()) m_preserves.pop(); } m_isprevtext = false; // m_disableOutputEscapingStates.pop(); m_cdataSectionStates.pop(); } /** * Process an attribute. * @param name The name of the attribute. * @param elemDesc non-null reference to the owning element description. * @param value The value of the attribute. * * @throws org.xml.sax.SAXException */ protected void processAttribute( String name, ElemDesc elemDesc, String value) throws org.xml.sax.SAXException { this.accum(' '); if (((value.length() == 0) || value.equalsIgnoreCase(name)) && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) { this.accum(name); } else { this.accum(name); this.accum('='); this.accum('\"'); if (elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) writeAttrURI(value, m_specialEscapeURLs); else writeAttrString(value, this.m_encoding); this.accum('\"'); } } /** * Tell if a character is an ASCII digit. */ private boolean isASCIIDigit(char c) { return (c >= '0' && c <= '9'); } /** * Make an integer into an HH hex value. * Does no checking on the size of the input, since this * is only meant to be used locally by writeAttrURI. * * @param i must be a value less than 255. * * @return should be a two character string. */ private String makeHHString(int i) { String s = Integer.toHexString(i).toUpperCase(); if(s.length() == 1) { s = "0"+s; } return s; } /** * Dmitri Ilyin: Makes sure if the String is HH encoded sign. * @param str must be 2 characters long * * @return true or false */ private boolean isHHSign(String str) { boolean sign = true; try { char r = (char)Integer.parseInt(str,16); } catch (NumberFormatException e) { sign = false; } return sign; } /** * Write the specified <var>string</var> after substituting non ASCII characters, * with <CODE>%HH</CODE>, where HH is the hex of the byte value. * * @param string String to convert to XML format. * @param doURLEscaping True if we should try to encode as * per http://www.ietf.org/rfc/rfc2396.txt. * * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. */ public void writeAttrURI(String string, boolean doURLEscaping) throws org.xml.sax.SAXException { // http://www.ietf.org/rfc/rfc2396.txt says: // A URI is always in an "escaped" form, since escaping or unescaping a // completed URI might change its semantics. Normally, the only time // escape encodings can safely be made is when the URI is being created // from its component parts; each component may have its own set of // characters that are reserved, so only the mechanism responsible for // generating or interpreting that component can determine whether or // not escaping a character will change its semantics. Likewise, a URI // must be separated into its components before the escaped characters // within those components can be safely decoded. // // ...So we do our best to do limited escaping of the URL, without // causing damage. If the URL is already properly escaped, in theory, this // function should not change the string value. char[] stringArray = string.toCharArray(); int len = stringArray.length; for (int i = 0; i < len; i++) { char ch = stringArray[i]; if ((ch < 33) || (ch > 126)) { if (doURLEscaping) { // Encode UTF16 to UTF8. // Reference is Unicode, A Primer, by Tony Graham. // Page 92. // Note that Kay doesn't escape 0x20... // if(ch == 0x20) // Not sure about this... -sb // { // accum(ch); // } // else if(ch <= 0x7F) { accum('%'); accum(makeHHString(ch)); } else if(ch <= 0x7FF) { // Clear low 6 bits before rotate, put high 4 bits in low byte, // and set two high bits. int high = (ch >> 6) | 0xC0; int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit accum('%'); accum(makeHHString(high)); accum('%'); accum(makeHHString(low)); } else if( isUTF16Surrogate(ch) ) // high surrogate { // I'm sure this can be done in 3 instructions, but I choose // to try and do it exactly like it is done in the book, at least // until we are sure this is totally clean. I don't think performance // is a big issue with this particular function, though I could be // wrong. Also, the stuff below clearly does more masking than // it needs to do. // Clear high 6 bits. int highSurrogate = ((int) ch) & 0x03FF; // Middle 4 bits (wwww) + 1 // "Note that the value of wwww from the high surrogate bit pattern // is incremented to make the uuuuu bit pattern in the scalar value // so the surrogate pair don't address the BMP." int wwww = ((highSurrogate & 0x03C0) >> 6); int uuuuu = wwww+1; // next 4 bits int zzzz = (highSurrogate & 0x003C) >> 2; // low 2 bits int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; // Get low surrogate character. ch = stringArray[++i]; // Clear high 6 bits. int lowSurrogate = ((int) ch) & 0x03FF; // put the middle 4 bits into the bottom of yyyyyy (byte 3) yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); // bottom 6 bits. int xxxxxx = (lowSurrogate & 0x003F); int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu int byte2 = 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; int byte3 = 0x80 | yyyyyy; int byte4 = 0x80 | xxxxxx; accum('%'); accum(makeHHString(byte1)); accum('%'); accum(makeHHString(byte2)); accum('%'); accum(makeHHString(byte3)); accum('%'); accum(makeHHString(byte4)); } else { int high = (ch >> 12) | 0xE0; // top 4 bits int middle = ((ch & 0x0FC0) >> 6) | 0x80; // middle 6 bits int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit accum('%'); accum(makeHHString(high)); accum('%'); accum(makeHHString(middle)); accum('%'); accum(makeHHString(low)); } } else if (ch < m_maxCharacter) { accum(ch); } else { accum("&#"); accum(Integer.toString(ch)); accum(';'); } } else if('%' == ch) { // If the character is a '%' number number, try to avoid double-escaping. // There is a question if this is legal behavior. // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) if ( ((i+2) < len) && isHHSign(new String(stringArray,i+1,2)) ) { accum(ch); } else { if (doURLEscaping) { accum('%'); accum(makeHHString(ch)); } else accum(ch); } } // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as // not allowing quotes in the URI proper syntax, nor in the fragment // identifier, we believe that it's OK to double escape quotes. else if (ch == '"') { // Mike Kay encodes this as ", so he may know something I don't? if (doURLEscaping) accum("%22"); else accum("""); // we have to escape this, I guess. } else { accum(ch); } } } /** * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. * * @param string String to convert to XML format. * @param encoding CURRENTLY NOT IMPLEMENTED. * * @throws org.xml.sax.SAXException */ public void writeAttrString(String string, String encoding) throws org.xml.sax.SAXException { final char chars[] = string.toCharArray(); final int strLen = chars.length; for (int i = 0; i < strLen; i++) { char ch = chars[i]; // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); // System.out.println("ch: "+(int)ch); // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); if ((ch < m_maxCharacter) && (!m_charInfo.isSpecial(ch))) { accum(ch); } else if ('<' == ch || '>' == ch) { accum(ch); // no escaping in this case, as specified in 15.2 } else if (('&' == ch) && ((i + 1) < strLen) && ('{' == chars[i + 1])) { accum(ch); // no escaping in this case, as specified in 15.2 } else { int pos = accumDefaultEntity(ch, i, chars, strLen, false); if (i != pos) { i = pos - 1; } else { if (isUTF16Surrogate(ch)) { try { i = writeUTF16Surrogate(ch, chars, i, strLen); } catch(IOException ioe) { throw new SAXException(ioe); } } // The next is kind of a hack to keep from escaping in the case // of Shift_JIS and the like. /* else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) && (ch != 160)) { accum(ch); // no escaping in this case } else */ String entityName = m_charInfo.getEntityNameForChar(ch); if (null != entityName) { accum('&'); accum(entityName); accum(';'); } else if (ch < m_maxCharacter) { accum(ch); // no escaping in this case } else { if (ch < m_maxCharacter) { accum(ch); // no escaping in this case } else { accum("&#"); accum(Integer.toString(ch)); accum(';'); } } } } } } /** * Copy an entity into the accumulation buffer. * * @param s The name of the entity. * @param pos unused. * * @return The pos argument. * * @throws org.xml.sax.SAXException */ private int copyEntityIntoBuf(String s, int pos) throws org.xml.sax.SAXException { int l = s.length(); accum('&'); for (int i = 0; i < l; i++) { accum(s.charAt(i)); } accum(';'); return pos; } /** * Receive notification of character data. * * <p>The Parser will call this method to report each chunk of * character data. SAX parsers may return all contiguous character * data in a single chunk, or they may split it into several * chunks; however, all of the characters in any single event * must come from the same external entity, so that the Locator * provides useful information.</p> * * <p>The application must not attempt to read from the array * outside of the specified range.</p> * * <p>Note that some parsers will report whitespace using the * ignorableWhitespace() method rather than this one (validating * parsers must do so).</p> * * @param chars The characters from the XML document. * @param start The start position in the array. * @param length The number of characters to read from the array. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @see #ignorableWhitespace * @see org.xml.sax.Locator * * @throws org.xml.sax.SAXException */ public void characters(char chars[], int start, int length) throws org.xml.sax.SAXException { if (m_isRawStack.peekOrFalse()) { try { writeParentTagEnd(); m_ispreserve = true; if (shouldIndent()) indent(m_currentIndent); // this.accum("<![CDATA["); // this.accum(chars, start, length); writeNormalizedChars(chars, start, length, false); // this.accum("]]>"); return; } catch (IOException ioe) { throw new org.xml.sax.SAXException( XSLMessages.createXPATHMessage( XPATHErrorResources.ER_OIERROR, null), ioe); //"IO error", ioe); } } else { super.characters(chars, start, length); } } /** * Receive notification of cdata. * * <p>The Parser will call this method to report each chunk of * character data. SAX parsers may return all contiguous character * data in a single chunk, or they may split it into several * chunks; however, all of the characters in any single event * must come from the same external entity, so that the Locator * provides useful information.</p> * * <p>The application must not attempt to read from the array * outside of the specified range.</p> * * <p>Note that some parsers will report whitespace using the * ignorableWhitespace() method rather than this one (validating * parsers must do so).</p> * * @param ch The characters from the XML document. * @param start The start position in the array. * @param length The number of characters to read from the array. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @see #ignorableWhitespace * @see org.xml.sax.Locator * * @throws org.xml.sax.SAXException */ public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException { if ((null != m_currentElementName) && (m_currentElementName.equalsIgnoreCase("SCRIPT") || m_currentElementName.equalsIgnoreCase("STYLE"))) { try { writeParentTagEnd(); m_ispreserve = true; if (shouldIndent()) indent(m_currentIndent); // this.accum(ch, start, length); writeNormalizedChars(ch, start, length, true); } catch (IOException ioe) { throw new org.xml.sax.SAXException( XSLMessages.createXPATHMessage( XPATHErrorResources.ER_OIERROR, null), ioe); //"IO error", ioe); } } /* else if(m_stripCData) // should normally always be false { try { writeParentTagEnd(); m_ispreserve = true; if (shouldIndent()) indent(m_currentIndent); // this.accum("<![CDATA["); this.accum(ch, start, length); // this.accum("]]>"); } catch(IOException ioe) { throw new org.xml.sax.SAXException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_OIERROR, null),ioe); //"IO error", ioe); } } */ else { super.cdata(ch, start, length); } } /** * Receive notification of a processing instruction. * * @param target The processing instruction target. * @param data The processing instruction data, or null if * none was supplied. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @throws org.xml.sax.SAXException */ public void processingInstruction(String target, String data) throws org.xml.sax.SAXException { // Use a fairly nasty hack to tell if the next node is supposed to be // unescaped text. if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) { startNonEscaping(); } else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) { endNonEscaping(); } else { writeParentTagEnd(); if (shouldIndent()) indent(m_currentIndent); this.accum("<?" + target); if (data.length() > 0 &&!Character.isSpaceChar(data.charAt(0))) this.accum(" "); this.accum(data + ">"); // different from XML // Always output a newline char if not inside of an // element. The whitespace is not significant in that // case. if (m_elemStack.isEmpty()) outputLineSep(); m_startNewLine = true; } } /** * Receive notivication of a entityReference. * * @param name non-null reference to entity name string. * * @throws org.xml.sax.SAXException */ public void entityReference(String name) throws org.xml.sax.SAXException { this.accum("&"); this.accum(name); this.accum(";"); } }