// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. // // TagSoup is licensed under the Apache License, // Version 2.0. You may obtain a copy of this license at // http://www.apache.org/licenses/LICENSE-2.0 . You may also have // additional legal rights not granted by this license. // // TagSoup is distributed in the hope that it will be useful, but // unless required by applicable law or agreed to in writing, TagSoup // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS // OF ANY KIND, either express or implied; not even the implied warranty // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // package com.onegravity.rteditor.converter.tagsoup; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.StringWriter; import java.io.Writer; import java.util.Enumeration; import java.util.HashMap; import java.util.Hashtable; import java.util.Locale; import java.util.Map; import java.util.Properties; import java.util.Stack; import java.util.regex.Matcher; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.ext.LexicalHandler; import org.xml.sax.helpers.NamespaceSupport; import org.xml.sax.helpers.XMLFilterImpl; import android.text.util.Linkify.MatchFilter; import com.onegravity.rteditor.converter.tagsoup.util.StringEscapeUtils; /** * Filter to write an XML document from a SAX event stream. * <p> * <p> * This class can be used by itself or as part of a SAX event stream: it takes * as input a series of SAX2 ContentHandler events and uses the information in * those events to write an XML document. Since this class is a filter, it can * also pass the events on down a filter chain for further processing (you can * use the XMLWriter to take a snapshot of the current state at any point in a * filter chain), and it can be used directly as a ContentHandler for a SAX2 * XMLReader. * </p> * <p> * <p> * The client creates a document by invoking the methods for standard SAX2 * events, always beginning with the {@link #startDocument startDocument} method * and ending with the {@link #endDocument endDocument} method. There are * convenience methods provided so that clients to not have to create empty * attribute lists or provide empty strings as parameters; for example, the * method invocation * </p> * <p> * <pre> * w.startElement("foo"); * </pre> * <p> * <p> * is equivalent to the regular SAX2 ContentHandler method * </p> * <p> * <pre> * w.startElement("", "foo", "", new AttributesImpl()); * </pre> * <p> * <p> * Except that it is more efficient because it does not allocate a new empty * attribute list each time. The following code will send a simple XML document * to standard output: * </p> * <p> * <pre> * XMLWriter w = new XMLWriter(); * * w.startDocument(); * w.startElement("greeting"); * w.characters("Hello, world!"); * w.endElement("greeting"); * w.endDocument(); * </pre> * <p> * <p> * The resulting document will look like this: * </p> * <p> * <pre> * <?xml version="1.0" standalone="yes"?> * * <greeting>Hello, world!</greeting> * </pre> * <p> * <p> * In fact, there is an even simpler convenience method, <var>dataElement</var>, * designed for writing elements that contain only character data, so the code * to generate the document could be shortened to * </p> * <p> * <pre> * XMLWriter w = new XMLWriter(); * * w.startDocument(); * w.dataElement("greeting", "Hello, world!"); * w.endDocument(); * </pre> * <p> * <h2>Whitespace</h2> * <p> * <p> * According to the XML Recommendation, <em>all</em> whitespace in an XML * document is potentially significant to an application, so this class never * adds newlines or indentation. If you insert three elements in a row, as in * </p> * <p> * <pre> * w.dataElement("item", "1"); * w.dataElement("item", "2"); * w.dataElement("item", "3"); * </pre> * <p> * <p> * you will end up with * </p> * <p> * <pre> * <item>1</item><item>3</item><item>3</item> * </pre> * <p> * <p> * You need to invoke one of the <var>characters</var> methods explicitly to add * newlines or indentation. Alternatively, you can use * {@link com.megginson.sax.DataWriter DataWriter}, which is derived from this * class -- it is optimized for writing purely data-oriented (or field-oriented) * XML, and does automatic linebreaks and indentation (but does not support * mixed content properly). * </p> * <p> * <p> * <h2>Namespace Support</h2> * <p> * <p> * The writer contains extensive support for XML Namespaces, so that a client * application does not have to keep track of prefixes and supply * <var>xmlns</var> attributes. By default, the XML writer will generate * Namespace declarations in the form _NS1, _NS2, etc., wherever they are * needed, as in the following example: * </p> * <p> * <pre> * w.startDocument(); * w.emptyElement("http://www.foo.com/ns/", "foo"); * w.endDocument(); * </pre> * <p> * <p> * The resulting document will look like this: * </p> * <p> * <pre> * <?xml version="1.0" standalone="yes"?> * * <_NS1:foo xmlns:_NS1="http://www.foo.com/ns/"/> * </pre> * <p> * <p> * In many cases, document authors will prefer to choose their own prefixes * rather than using the (ugly) default names. The XML writer allows two methods * for selecting prefixes: * </p> * <p> * <ol> * <li>the qualified name</li> * <li>the {@link #setPrefix setPrefix} method.</li> * </ol> * <p> * <p> * Whenever the XML writer finds a new Namespace URI, it checks to see if a * qualified (prefixed) name is also available; if so it attempts to use the * name's prefix (as long as the prefix is not already in use for another * Namespace URI). * </p> * <p> * <p> * Before writing a document, the client can also pre-map a prefix to a * Namespace URI with the setPrefix method: * </p> * <p> * <pre> * w.setPrefix("http://www.foo.com/ns/", "foo"); * w.startDocument(); * w.emptyElement("http://www.foo.com/ns/", "foo"); * w.endDocument(); * </pre> * <p> * <p> * The resulting document will look like this: * </p> * <p> * <pre> * <?xml version="1.0" standalone="yes"?> * * <foo:foo xmlns:foo="http://www.foo.com/ns/"/> * </pre> * <p> * <p> * The default Namespace simply uses an empty string as the prefix: * </p> * <p> * <pre> * w.setPrefix("http://www.foo.com/ns/", ""); * w.startDocument(); * w.emptyElement("http://www.foo.com/ns/", "foo"); * w.endDocument(); * </pre> * <p> * <p> * The resulting document will look like this: * </p> * <p> * <pre> * <?xml version="1.0" standalone="yes"?> * * <foo xmlns="http://www.foo.com/ns/"/> * </pre> * <p> * <p> * By default, the XML writer will not declare a Namespace until it is actually * used. Sometimes, this approach will create a large number of Namespace * declarations, as in the following example: * </p> * <p> * <pre> * <xml version="1.0" standalone="yes"?> * * <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> * <rdf:Description about="http://www.foo.com/ids/books/12345"> * <dc:title xmlns:dc="http://www.purl.org/dc/">A Dark Night</dc:title> * <dc:creator xmlns:dc="http://www.purl.org/dc/">Jane Smith</dc:title> * <dc:date xmlns:dc="http://www.purl.org/dc/">2000-09-09</dc:title> * </rdf:Description> * </rdf:RDF> * </pre> * <p> * <p> * The "rdf" prefix is declared only once, because the RDF Namespace is used by * the root element and can be inherited by all of its descendants; the "dc" * prefix, on the other hand, is declared three times, because no higher element * uses the Namespace. To solve this problem, you can instruct the XML writer to * predeclare Namespaces on the root element even if they are not used there: * </p> * <p> * <pre> * w.forceNSDecl("http://www.purl.org/dc/"); * </pre> * <p> * <p> * Now, the "dc" prefix will be declared on the root element even though it's * not needed there, and can be inherited by its descendants: * </p> * <p> * <pre> * <xml version="1.0" standalone="yes"?> * * <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" * xmlns:dc="http://www.purl.org/dc/"> * <rdf:Description about="http://www.foo.com/ids/books/12345"> * <dc:title>A Dark Night</dc:title> * <dc:creator>Jane Smith</dc:title> * <dc:date>2000-09-09</dc:title> * </rdf:Description> * </rdf:RDF> * </pre> * <p> * <p> * This approach is also useful for declaring Namespace prefixes that be used by * qualified names appearing in attribute values or character data. * </p> * * @author David Megginson, david@megginson.com * @version 0.2 * @see org.xml.sax.XMLFilter * @see org.xml.sax.ContentHandler */ public class HTMLWriter extends XMLFilterImpl implements LexicalHandler { // ////////////////////////////////////////////////////////////////// // Tags to ignore // ////////////////////////////////////////////////////////////////// private static Map<String, Map<String, String>> mTags2Ignore = new HashMap<String, Map<String, String>>(); static { // meta refresh tag + iframe meta refresh HashMap<String, String> attributes = new HashMap<String, String>(); attributes.put("http-equiv", "Refresh"); mTags2Ignore.put("meta", attributes); mTags2Ignore.put("iframe", attributes); // video, audio tags with autoplay attributes = new HashMap<String, String>(); attributes.put("autoplay", "autoplay#true"); mTags2Ignore.put("audio", attributes); mTags2Ignore.put("video", attributes); } // ////////////////////////////////////////////////////////////////// // Constants. // ////////////////////////////////////////////////////////////////// public static final String CDATA_SECTION_ELEMENTS = "cdata-section-elements"; public static final String DOCTYPE_PUBLIC = "doctype-public"; public static final String DOCTYPE_SYSTEM = "doctype-system"; public static final String ENCODING = "encoding"; public static final String INDENT = "indent"; // currently ignored public static final String MEDIA_TYPE = "media-type"; // currently ignored public static final String METHOD = "method"; // currently html or xml public static final String OMIT_XML_DECLARATION = "omit-xml-declaration"; public static final String STANDALONE = "standalone"; // currently ignored public static final String VERSION = "version"; // ////////////////////////////////////////////////////////////////// // Internal state. // ////////////////////////////////////////////////////////////////// private Hashtable<String, String> prefixTable; private Hashtable<String, Boolean> forcedDeclTable; private Hashtable<String, String> doneDeclTable; private int elementLevel = 0; private Writer output; private NamespaceSupport nsSupport; private int prefixCounter = 0; private Properties outputProperties; private String outputEncoding = ""; private boolean htmlMode = false; private boolean forceDTD = false; private boolean hasOutputDTD = false; private String overridePublic = null; private String overrideSystem = null; private String version = null; private String standalone = null; private boolean cdataElement = false; private boolean mOmitXHTMLNamespace; private Stack<String> mIgnoredTags; // ////////////////////////////////////////////////////////////////// // Constructors. // ////////////////////////////////////////////////////////////////// /** * Create a new XML writer. * <p> * <p> * Write to standard output. * </p> */ public HTMLWriter(boolean omitXHTMLNamespace) { nsSupport = new NamespaceSupport(); prefixTable = new Hashtable<String, String>(); forcedDeclTable = new Hashtable<String, Boolean>(); doneDeclTable = new Hashtable<String, String>(); outputProperties = new Properties(); // we always generate HTML code... setOutputProperty(METHOD, "html"); setOutputProperty(OMIT_XML_DECLARATION, "yes"); mOmitXHTMLNamespace = omitXHTMLNamespace; mIgnoredTags = new Stack<String>(); } // ////////////////////////////////////////////////////////////////// // Public methods. // ////////////////////////////////////////////////////////////////// /** * Reset the writer. * <p> * <p> * This method is especially useful if the writer throws an exception before * it is finished, and you want to reuse the writer for a new document. It * is usually a good idea to invoke {@link #flush flush} before resetting * the writer, to make sure that no output is lost. * </p> * <p> * <p> * This method is invoked automatically by the {@link #startDocument * startDocument} method before writing a new document. * </p> * <p> * <p> * <strong>Note:</strong> this method will <em>not</em> clear the prefix or * URI information in the writer or the selected output writer. * </p> * * @throws SAXException * @see #flush */ public void reset() throws SAXException { writeText4Links(); elementLevel = 0; prefixCounter = 0; nsSupport.reset(); } /** * Flush the output. * <p> * <p> * This method flushes the output stream. It is especially useful when you * need to make certain that the entire document has been written to output * but do not want to close the output stream. * </p> * <p> * <p> * This method is invoked automatically by the {@link #endDocument * endDocument} method after writing a document. * </p> * * @see #reset */ public void flush() throws IOException, SAXException { writeText4Links(); output.flush(); } /** * Set a new output destination for the document. * * @param writer The output destination, or null to use standard output. * @return The current output writer. * @see #flush */ public void setOutput(Writer writer) { if (writer == null) { output = new OutputStreamWriter(System.out); } else { output = writer; } } /** * Specify a preferred prefix for a Namespace URI. * <p> * <p> * Note that this method does not actually force the Namespace to be * declared; to do that, use the {@link #forceNSDecl(java.lang.String) * forceNSDecl} method as well. * </p> * * @param uri The Namespace URI. * @param prefix The preferred prefix, or "" to select the default Namespace. * @see #getPrefix * @see #forceNSDecl(java.lang.String) * @see #forceNSDecl(java.lang.String, java.lang.String) */ public void setPrefix(String uri, String prefix) { prefixTable.put(uri, prefix); } /** * Get the current or preferred prefix for a Namespace URI. * * @param uri The Namespace URI. * @return The preferred prefix, or "" for the default Namespace. * @see #setPrefix */ public String getPrefix(String uri) { return (String) prefixTable.get(uri); } /** * Force a Namespace to be declared on the root element. * <p> * <p> * By default, the XMLWriter will declare only the Namespaces needed for an * element; as a result, a Namespace may be declared many places in a * document if it is not used on the root element. * </p> * <p> * <p> * This method forces a Namespace to be declared on the root element even if * it is not used there, and reduces the number of xmlns attributes in the * document. * </p> * * @param uri The Namespace URI to declare. * @see #forceNSDecl(java.lang.String, java.lang.String) * @see #setPrefix */ public void forceNSDecl(String uri) { forcedDeclTable.put(uri, Boolean.TRUE); } /** * Force a Namespace declaration with a preferred prefix. * <p> * <p> * This is a convenience method that invokes {@link #setPrefix setPrefix} * then {@link #forceNSDecl(java.lang.String) forceNSDecl}. * </p> * * @param uri The Namespace URI to declare on the root element. * @param prefix The preferred prefix for the Namespace, or "" for the default * Namespace. * @see #setPrefix * @see #forceNSDecl(java.lang.String) */ public void forceNSDecl(String uri, String prefix) { setPrefix(uri, prefix); forceNSDecl(uri); } // ////////////////////////////////////////////////////////////////// // Methods from org.xml.sax.ContentHandler. // ////////////////////////////////////////////////////////////////// /** * Write the XML declaration at the beginning of the document. * <p> * Pass the event on down the filter chain for further processing. * * @throws org.xml.sax.SAXException If there is an error writing the XML declaration, or if a * handler further down the filter chain raises an exception. * @see org.xml.sax.ContentHandler#startDocument */ public void startDocument() throws SAXException { writeText4Links(); reset(); if (!("yes".equals(outputProperties.getProperty(OMIT_XML_DECLARATION, "no")))) { write("<?xml"); if (version == null) { write(" version=\"1.0\""); } else { write(" version=\""); write(version); write("\""); } if (outputEncoding != null && outputEncoding != "") { write(" encoding=\""); write(outputEncoding); write("\""); } if (standalone == null) { write(" standalone=\"yes\"?>\n"); } else { write(" standalone=\""); write(standalone); write("\""); } } super.startDocument(); } /** * Write a newline at the end of the document. * <p> * Pass the event on down the filter chain for further processing. * * @throws org.xml.sax.SAXException If there is an error writing the newline, or if a handler * further down the filter chain raises an exception. * @see org.xml.sax.ContentHandler#endDocument */ public void endDocument() throws SAXException { writeText4Links(); write('\n'); super.endDocument(); try { flush(); } catch (IOException e) { throw new SAXException(e); } } /** * Write a start tag. * <p> * Pass the event on down the filter chain for further processing. * * @param uri The Namespace URI, or the empty string if none is available. * @param localName The element's local (unprefixed) name (required). * @param qName The element's qualified (prefixed) name, or the empty string * is none is available. This method will use the qName as a * template for generating a prefix if necessary, but it is not * guaranteed to use the same qName. * @param atts The element's attribute list (must not be null). * @throws org.xml.sax.SAXException If there is an error writing the start tag, or if a * handler further down the filter chain raises an exception. * @see org.xml.sax.ContentHandler#startElement */ @Override public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { writeText4Links(); if (!ignoreElement(uri, localName, qName, atts)) { elementLevel++; nsSupport.pushContext(); if (forceDTD && !hasOutputDTD) { startDTD(localName == null ? qName : localName, "", ""); } write('<'); writeName(uri, localName, qName, true); writeAttributes(atts); if (elementLevel == 1) { forceNSDecls(); } if (!mOmitXHTMLNamespace || !"html".equalsIgnoreCase(localName)) { writeNSDecls(); } write('>'); if (htmlMode && (qName.equals("script") || qName.equals("style"))) { cdataElement = true; } if (htmlMode && localName.equals("a")) { mIgnoreChars = true; } super.startElement(uri, localName, qName, atts); } } private boolean ignoreElement(String uri, String localName, String qName, Attributes atts) { Map<String, String> tagAttrs = mTags2Ignore.get(qName.toLowerCase(Locale.US)); if (tagAttrs != null) { for (String attrKey : tagAttrs.keySet()) { for (String attrValue : tagAttrs.get(attrKey).split("#")) { String value = atts.getValue(attrKey); if (!isNullOrEmpty(value) && attrValue.equalsIgnoreCase(value)) { mIgnoredTags.push(qName); return true; } } } } return false; } private boolean isNullOrEmpty(String string) { return string == null || string.length() == 0; } /** * Write an end tag. * <p> * Pass the event on down the filter chain for further processing. * * @param uri The Namespace URI, or the empty string if none is available. * @param localName The element's local (unprefixed) name (required). * @param qName The element's qualified (prefixed) name, or the empty string * is none is available. This method will use the qName as a * template for generating a prefix if necessary, but it is not * guaranteed to use the same qName. * @throws org.xml.sax.SAXException If there is an error writing the end tag, or if a handler * further down the filter chain raises an exception. * @see org.xml.sax.ContentHandler#endElement */ @Override public void endElement(String uri, String localName, String qName) throws SAXException { writeText4Links(); if (!mIgnoredTags.isEmpty() && mIgnoredTags.peek().equalsIgnoreCase(qName)) { mIgnoredTags.pop(); } else { if (!(htmlMode && (uri.equals("http://www.w3.org/1999/xhtml") || uri.equals("")) && (qName.equals("area") || qName.equals("base") || qName.equals("basefont") || qName.equals("br") || qName.equals("col") || qName.equals("frame") || qName.equals("hr") || qName.equals("img") || qName.equals("input") || qName.equals("isindex") || qName.equals("link") || qName.equals("meta") || qName.equals("param")))) { write("</"); writeName(uri, localName, qName, true); write('>'); } if (elementLevel == 1) { write('\n'); } if (htmlMode && localName.equals("a")) { mIgnoreChars = false; } cdataElement = false; super.endElement(uri, localName, qName); nsSupport.popContext(); elementLevel--; } } /** * Write character data. * <p> * Pass the event on down the filter chain for further processing. * * @param ch The array of characters to write. * @param start The starting position in the array. * @param length The number of characters to write. * @throws org.xml.sax.SAXException If there is an error writing the characters, or if a * handler further down the filter chain raises an exception. * @see org.xml.sax.ContentHandler#characters */ @Override public void characters(char ch[], int start, int len) throws SAXException { if (!cdataElement) { if (mIgnoreChars) { writeText4Links(); writeEscUTF16(new String(ch), start, len, false); } else { collectText4Links(ch, start, len); } } else { writeText4Links(); for (int i = start; i < start + len; i++) { write(ch[i]); } } super.characters(ch, start, len); } /** * Write ignorable whitespace. * <p> * Pass the event on down the filter chain for further processing. * * @param ch The array of characters to write. * @param start The starting position in the array. * @param length The number of characters to write. * @throws org.xml.sax.SAXException If there is an error writing the whitespace, or if a * handler further down the filter chain raises an exception. * @see org.xml.sax.ContentHandler#ignorableWhitespace */ @Override public void ignorableWhitespace(char ch[], int start, int length) throws SAXException { writeText4Links(); writeEscUTF16(new String(ch), start, length, false); super.ignorableWhitespace(ch, start, length); } /** * Write a processing instruction. * <p> * Pass the event on down the filter chain for further processing. * * @param target The PI target. * @param data The PI data. * @throws org.xml.sax.SAXException If there is an error writing the PI, or if a handler * further down the filter chain raises an exception. * @see org.xml.sax.ContentHandler#processingInstruction */ @Override public void processingInstruction(String target, String data) throws SAXException { writeText4Links(); write("<?"); write(target); write(' '); write(data); write("?>"); if (elementLevel < 1) { write('\n'); } super.processingInstruction(target, data); } // ////////////////////////////////////////////////////////////////// // Internal methods. // ////////////////////////////////////////////////////////////////// /** * Force all Namespaces to be declared. * <p> * This method is used on the root element to ensure that the predeclared * Namespaces all appear. */ private void forceNSDecls() { Enumeration<String> prefixes = forcedDeclTable.keys(); while (prefixes.hasMoreElements()) { String prefix = (String) prefixes.nextElement(); doPrefix(prefix, null, true); } } /** * Determine the prefix for an element or attribute name. * <p> * * @param uri The Namespace URI. * @param qName The qualified name (optional); this will be used to indicate * the preferred prefix if none is currently bound. * @param isElement true if this is an element name, false if it is an attribute * name (which cannot use the default Namespace). */ private String doPrefix(String uri, String qName, boolean isElement) { String defaultNS = nsSupport.getURI(""); if ("".equals(uri)) { if (isElement && defaultNS != null) nsSupport.declarePrefix("", ""); return null; } String prefix; if (isElement && defaultNS != null && uri.equals(defaultNS)) { prefix = ""; } else { prefix = nsSupport.getPrefix(uri); } if (prefix != null) { return prefix; } prefix = (String) doneDeclTable.get(uri); if (prefix != null && ((!isElement || defaultNS != null) && "".equals(prefix) || nsSupport .getURI(prefix) != null)) { prefix = null; } if (prefix == null) { prefix = (String) prefixTable.get(uri); if (prefix != null && ((!isElement || defaultNS != null) && "".equals(prefix) || nsSupport .getURI(prefix) != null)) { prefix = null; } } if (prefix == null && qName != null && !"".equals(qName)) { int i = qName.indexOf(':'); if (i == -1) { if (isElement && defaultNS == null) { prefix = ""; } } else { prefix = qName.substring(0, i); } } for (; prefix == null || nsSupport.getURI(prefix) != null; prefix = "__NS" + ++prefixCounter) ; nsSupport.declarePrefix(prefix, uri); doneDeclTable.put(uri, prefix); return prefix; } /** * Write a raw character. * * @param c The character to write. * @throws org.xml.sax.SAXException If there is an error writing the character, this method * will throw an IOException wrapped in a SAXException. */ private void write(char c) throws SAXException { try { output.write(c); } catch (IOException e) { throw new SAXException(e); } } /** * Write a raw string. * * @param s * @throws org.xml.sax.SAXException If there is an error writing the string, this method will * throw an IOException wrapped in a SAXException */ private void write(String s) throws SAXException { try { output.write(s); } catch (IOException e) { throw new SAXException(e); } } /** * Write out an attribute list, escaping values. * <p> * The names will have prefixes added to them. * * @param atts The attribute list to write. * @throws org.xml.SAXException If there is an error writing the attribute list, this * method will throw an IOException wrapped in a * SAXException. */ private void writeAttributes(Attributes atts) throws SAXException { int len = atts.getLength(); for (int i = 0; i < len; i++) { write(' '); writeName(atts.getURI(i), atts.getLocalName(i), atts.getQName(i), false); if (htmlMode && booleanAttribute(atts.getLocalName(i), atts.getQName(i), atts.getValue(i))) break; write("=\""); String s = atts.getValue(i); writeEscUTF16(s, 0, s.length(), true); write('"'); } } private String[] booleans = {"checked", "compact", "declare", "defer", "disabled", "ismap", "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", "selected"}; // Return true if the attribute is an HTML boolean from the above list. private boolean booleanAttribute(String localName, String qName, String value) { String name = localName; if (name == null) { int i = qName.indexOf(':'); if (i != -1) name = qName.substring(i + 1, qName.length()); } if (!name.equals(value)) return false; for (int j = 0; j < booleans.length; j++) { if (name.equals(booleans[j])) return true; } return false; } /** * Write an array of data characters with escaping. * * @param ch The array of characters. * @param start The starting position. * @param length The number of characters to use. * @param isAttVal true if this is an attribute value literal. * @throws org.xml.SAXException If there is an error writing the characters, this method * will throw an IOException wrapped in a SAXException. */ private void writeEscUTF16(String s, int start, int length, boolean isAttVal) throws SAXException { String subString = s.substring(start, start + length); write(StringEscapeUtils.escapeHtml4(subString)); } /** * Write out the list of Namespace declarations. * * @throws org.xml.sax.SAXException This method will throw an IOException wrapped in a * SAXException if there is an error writing the Namespace * declarations. */ @SuppressWarnings("unchecked") private void writeNSDecls() throws SAXException { Enumeration<String> prefixes = (Enumeration<String>) nsSupport.getDeclaredPrefixes(); while (prefixes.hasMoreElements()) { String prefix = (String) prefixes.nextElement(); String uri = nsSupport.getURI(prefix); if (uri == null) { uri = ""; } write(' '); if ("".equals(prefix)) { write("xmlns=\""); } else { write("xmlns:"); write(prefix); write("=\""); } writeEscUTF16(uri, 0, uri.length(), true); write('\"'); } } /** * Write an element or attribute name. * * @param uri The Namespace URI. * @param localName The local name. * @param qName The prefixed name, if available, or the empty string. * @param isElement true if this is an element name, false if it is an attribute * name. * @throws org.xml.sax.SAXException This method will throw an IOException wrapped in a * SAXException if there is an error writing the name. */ private void writeName(String uri, String localName, String qName, boolean isElement) throws SAXException { String prefix = doPrefix(uri, qName, isElement); if (prefix != null && !"".equals(prefix)) { write(prefix); write(':'); } if (localName != null && !"".equals(localName)) { write(localName); } else { int i = qName.indexOf(':'); write(qName.substring(i + 1, qName.length())); } } // ////////////////////////////////////////////////////////////////// // Default LexicalHandler implementation // ////////////////////////////////////////////////////////////////// @Override public void comment(char[] ch, int start, int length) throws SAXException { write("<!--"); for (int i = start; i < start + length; i++) { write(ch[i]); if (ch[i] == '-' && i + 1 <= start + length && ch[i + 1] == '-') write(' '); } write("-->"); } @Override public void endCDATA() throws SAXException { } @Override public void endDTD() throws SAXException { } @Override public void endEntity(String name) throws SAXException { } @Override public void startCDATA() throws SAXException { } @Override public void startDTD(String name, String publicid, String systemid) throws SAXException { if (name == null) return; // can't cope if (hasOutputDTD) return; // only one DTD hasOutputDTD = true; write("<!DOCTYPE "); write(name); if (systemid == null) systemid = ""; if (overrideSystem != null) systemid = overrideSystem; char sysquote = (systemid.indexOf('"') != -1) ? '\'' : '"'; if (overridePublic != null) publicid = overridePublic; if (!(publicid == null || "".equals(publicid))) { char pubquote = (publicid.indexOf('"') != -1) ? '\'' : '"'; write(" PUBLIC "); write(pubquote); write(publicid); write(pubquote); write(' '); } else { write(" SYSTEM "); } write(sysquote); write(systemid); write(sysquote); write(">\n"); } @Override public void startEntity(String name) throws SAXException { } // ////////////////////////////////////////////////////////////////// // Output properties // ////////////////////////////////////////////////////////////////// public String getOutputProperty(String key) { return outputProperties.getProperty(key); } public void setOutputProperty(String key, String value) { outputProperties.setProperty(key, value); if (key.equals(ENCODING)) { outputEncoding = value; } else if (key.equals(METHOD)) { htmlMode = value.equals("html"); } else if (key.equals(DOCTYPE_PUBLIC)) { overridePublic = value; forceDTD = true; } else if (key.equals(DOCTYPE_SYSTEM)) { overrideSystem = value; forceDTD = true; } else if (key.equals(VERSION)) { version = value; } else if (key.equals(STANDALONE)) { standalone = value; } } // ////////////////////////////////////////////////////////////////// // Linkifier code. // ////////////////////////////////////////////////////////////////// private static final String[] LINK_SCHEMAS = new String[]{"http://", "https://", "rtsp://"}; private static final MatchFilter URL_MATCH_FILTER = new MatchFilter() { public final boolean acceptMatch(CharSequence s, int start, int end) { return start == 0 || s.charAt(start - 1) != '@'; } }; private boolean mIgnoreChars; private StringBuffer mLastText4Links = new StringBuffer(); private void collectText4Links(char ch[], int start, int len) throws SAXException { mLastText4Links.append(String.valueOf(ch, start, len)); } private void writeText4Links() throws SAXException { if (mLastText4Links.length() > 0) { String text2Write = mLastText4Links.toString(); Writer tmp = output; output = new StringWriter(); Matcher m = Patterns.WEB_URL.matcher(mLastText4Links); int lastLinkEnd = 0; while (m.find()) { if (URL_MATCH_FILTER == null || URL_MATCH_FILTER.acceptMatch(mLastText4Links, m.start(), m.start())) { // write leading characters writeEscUTF16(text2Write, lastLinkEnd, m.start() - lastLinkEnd, false); // write link try { String linkText = m.group(0); String link = makeUrl(linkText, LINK_SCHEMAS, m); output.append("<a href=\"" + link + "\">"); writeEscUTF16(linkText, 0, linkText.length(), false); output.append("</a>"); } catch (IOException ignore) { } lastLinkEnd = m.end(); } } // write tailing characters if (lastLinkEnd < text2Write.length()) { writeEscUTF16(text2Write, lastLinkEnd, text2Write.length() - lastLinkEnd, false); } String text2WriteString = output.toString(); output = tmp; write(text2WriteString); mLastText4Links.setLength(0); } } private String makeUrl(String url, String[] prefixes, Matcher m) { boolean hasPrefix = false; for (int i = 0; i < prefixes.length; i++) { if (url.regionMatches(true, 0, prefixes[i], 0, prefixes[i].length())) { hasPrefix = true; // Fix capitalization if necessary if (!url.regionMatches(false, 0, prefixes[i], 0, prefixes[i].length())) { url = prefixes[i] + url.substring(prefixes[i].length()); } break; } } if (!hasPrefix) { url = prefixes[0] + url; } return url.replace("\u00a0", ""); // replace("\u00a0","") removes   } }