/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2011 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.sax;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Arrays;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
public class HtmlSerializer implements ContentHandler, LexicalHandler {
private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
"bgsound", "br", "col", "command", "embed", "frame", "hr", "img",
"input", "keygen", "link", "meta", "param", "source", "track",
"wbr" };
private static final String[] NON_ESCAPING = { "iframe", "noembed",
"noframes", "noscript", "plaintext", "script", "style", "xmp" };
private static Writer wrap(OutputStream out) {
try {
return new OutputStreamWriter(out, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
private int ignoreLevel = 0;
private int escapeLevel = 0;
private final Writer writer;
public HtmlSerializer(OutputStream out) {
this(wrap(out));
}
public HtmlSerializer(Writer out) {
this.writer = out;
}
public void characters(char[] ch, int start, int length)
throws SAXException {
try {
if (escapeLevel > 0) {
writer.write(ch, start, length);
} else {
for (int i = start; i < start + length; i++) {
char c = ch[i];
switch (c) {
case '<':
writer.write("<");
break;
case '>':
writer.write(">");
break;
case '&':
writer.write("&");
break;
case '\u00A0':
writer.write(" ");
break;
default:
writer.write(c);
break;
}
}
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endDocument() throws SAXException {
try {
writer.flush();
writer.close();
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (escapeLevel > 0) {
escapeLevel--;
}
if (ignoreLevel > 0) {
ignoreLevel--;
} else {
try {
writer.write('<');
writer.write('/');
writer.write(localName);
writer.write('>');
} catch (IOException e) {
throw new SAXException(e);
}
}
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
characters(ch, start, length);
}
public void processingInstruction(String target, String data)
throws SAXException {
}
public void setDocumentLocator(Locator locator) {
}
public void startDocument() throws SAXException {
try {
writer.write("<!DOCTYPE html>\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
if (escapeLevel > 0) {
escapeLevel++;
}
boolean xhtml = "http://www.w3.org/1999/xhtml".equals(uri);
if (ignoreLevel > 0
|| !(xhtml || "http://www.w3.org/2000/svg".equals(uri) || "http://www.w3.org/1998/Math/MathML".equals(uri))) {
ignoreLevel++;
return;
}
try {
writer.write('<');
writer.write(localName);
for (int i = 0; i < atts.getLength(); i++) {
String attUri = atts.getURI(i);
String attLocal = atts.getLocalName(i);
if (attUri.length() == 0) {
writer.write(' ');
} else if (!xhtml
&& "http://www.w3.org/1999/xlink".equals(attUri)) {
writer.write(" xlink:");
} else if ("http://www.w3.org/XML/1998/namespace".equals(attUri)) {
if (xhtml) {
if ("lang".equals(attLocal)) {
writer.write(' ');
} else {
continue;
}
} else {
writer.write(" xml:");
}
} else {
continue;
}
writer.write(atts.getLocalName(i));
writer.write('=');
writer.write('"');
String val = atts.getValue(i);
for (int j = 0; j < val.length(); j++) {
char c = val.charAt(j);
switch (c) {
case '"':
writer.write(""");
break;
case '&':
writer.write("&");
break;
case '\u00A0':
writer.write(" ");
break;
default:
writer.write(c);
break;
}
}
writer.write('"');
}
writer.write('>');
if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
ignoreLevel++;
return;
}
if ("pre".equals(localName) || "textarea".equals(localName)
|| "listing".equals(localName)) {
writer.write('\n');
}
if (escapeLevel == 0
&& Arrays.binarySearch(NON_ESCAPING, localName) > -1) {
escapeLevel = 1;
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void comment(char[] ch, int start, int length) throws SAXException {
if (ignoreLevel > 0 || escapeLevel > 0) {
return;
}
try {
writer.write("<!--");
writer.write(ch, start, length);
writer.write("-->");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endCDATA() throws SAXException {
}
public void endDTD() throws SAXException {
}
public void endEntity(String name) throws SAXException {
}
public void startCDATA() throws SAXException {
}
public void startDTD(String name, String publicId, String systemId)
throws SAXException {
}
public void startEntity(String name) throws SAXException {
}
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void skippedEntity(String name) throws SAXException {
}
}