/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.impl.ElementName;
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
import nu.validator.htmlparser.impl.HtmlAttributes;
import nu.validator.htmlparser.impl.Tokenizer;
import nu.validator.htmlparser.io.Driver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class TokenPrinter implements TokenHandler, ErrorHandler {
private final Writer writer;
public void characters(char[] buf, int start, int length)
throws SAXException {
try {
boolean lineStarted = true;
writer.write('-');
for (int i = start; i < start + length; i++) {
if (!lineStarted) {
writer.write("\n-");
lineStarted = true;
}
char c = buf[i];
if (c == '\n') {
writer.write("\\n");
lineStarted = false;
} else {
writer.write(c);
}
}
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void comment(char[] buf, int start, int length) throws SAXException {
try {
writer.write('!');
writer.write(buf, start, length);
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void doctype(String name, String publicIdentifier, String systemIdentifier, boolean forceQuirks) throws SAXException {
try {
writer.write('D');
writer.write(name);
writer.write(' ');
writer.write("" + forceQuirks);
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endTag(ElementName eltName) throws SAXException {
try {
writer.write(')');
writer.write(eltName.name);
writer.write('\n');
} catch (IOException e) {
throw new SAXException(e);
}
}
public void eof() throws SAXException {
try {
writer.write("E\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void startTokenization(Tokenizer self) throws SAXException {
}
public void startTag(ElementName eltName, HtmlAttributes attributes, boolean selfClosing)
throws SAXException {
try {
writer.write('(');
writer.write(eltName.name);
writer.write('\n');
for (int i = 0; i < attributes.getLength(); i++) {
writer.write('A');
writer.write(attributes.getQNameNoBoundsCheck(i));
writer.write(' ');
writer.write(attributes.getValueNoBoundsCheck(i));
writer.write('\n');
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public boolean wantsComments() throws SAXException {
return true;
}
public static void main(String[] args) throws SAXException, IOException {
TokenPrinter printer = new TokenPrinter(new OutputStreamWriter(System.out, "UTF-8"));
Driver tokenizer = new Driver(new ErrorReportingTokenizer(printer));
tokenizer.setErrorHandler(printer);
File file = new File(args[0]);
InputSource is = new InputSource(new FileInputStream(file));
is.setSystemId(file.toURI().toASCIIString());
tokenizer.tokenize(is);
}
/**
* @param writer
*/
public TokenPrinter(final Writer writer) {
this.writer = writer;
}
public void error(SAXParseException exception) throws SAXException {
try {
writer.write("R ");
writer.write(exception.getMessage());
writer.write("\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void fatalError(SAXParseException exception) throws SAXException {
try {
writer.write("F ");
writer.write(exception.getMessage());
writer.write("\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void warning(SAXParseException exception) throws SAXException {
try {
writer.write("W ");
writer.write(exception.getMessage());
writer.write("\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endTokenization() throws SAXException {
try {
writer.flush();
writer.close();
} catch (IOException e) {
throw new SAXException(e);
}
}
@Override public void zeroOriginatingReplacementCharacter()
throws SAXException {
try {
writer.write("0\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
@Override public boolean cdataSectionAllowed() throws SAXException {
return false;
}
}