/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.tools;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TemplatesHandler;
import javax.xml.transform.sax.TransformerHandler;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
import nu.validator.htmlparser.sax.HtmlParser;
import nu.validator.htmlparser.sax.HtmlSerializer;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.htmlparser.test.SystemErrErrorHandler;
import org.w3c.dom.Document;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;
public class XSLT4HTML5 {
private enum Mode {
STREAMING_SAX, BUFFERED_SAX, DOM,
}
private static final String TEMPLATE = "--template=";
private static final String INPUT_HTML = "--input-html=";
private static final String INPUT_XML = "--input-xml=";
private static final String OUTPUT_HTML = "--output-html=";
private static final String OUTPUT_XML = "--output-xml=";
private static final String MODE = "--mode=";
/**
* @param args
* @throws ParserConfigurationException
* @throws SAXException
* @throws IOException
* @throws MalformedURLException
* @throws TransformerException
*/
public static void main(String[] args) throws SAXException,
ParserConfigurationException, MalformedURLException, IOException, TransformerException {
if (args.length == 0) {
System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]");
System.exit(0);
}
String template = null;
String input = null;
boolean inputHtml = false;
String output = null;
boolean outputHtml = false;
Mode mode = null;
for (int i = 0; i < args.length; i++) {
String arg = args[i];
if (arg.startsWith(TEMPLATE)) {
if (template == null) {
template = arg.substring(TEMPLATE.length());
} else {
System.err.println("Tried to set template twice.");
System.exit(1);
}
} else if (arg.startsWith(INPUT_HTML)) {
if (input == null) {
input = arg.substring(INPUT_HTML.length());
inputHtml = true;
} else {
System.err.println("Tried to set input twice.");
System.exit(2);
}
} else if (arg.startsWith(INPUT_XML)) {
if (input == null) {
input = arg.substring(INPUT_XML.length());
inputHtml = false;
} else {
System.err.println("Tried to set input twice.");
System.exit(2);
}
} else if (arg.startsWith(OUTPUT_HTML)) {
if (output == null) {
output = arg.substring(OUTPUT_HTML.length());
outputHtml = true;
} else {
System.err.println("Tried to set output twice.");
System.exit(3);
}
} else if (arg.startsWith(OUTPUT_XML)) {
if (output == null) {
output = arg.substring(OUTPUT_XML.length());
outputHtml = false;
} else {
System.err.println("Tried to set output twice.");
System.exit(3);
}
} else if (arg.startsWith(MODE)) {
if (mode == null) {
String modeStr = arg.substring(MODE.length());
if ("dom".equals(modeStr)) {
mode = Mode.DOM;
} else if ("sax-buffered".equals(modeStr)) {
mode = Mode.BUFFERED_SAX;
} else if ("sax-streaming".equals(modeStr)) {
mode = Mode.STREAMING_SAX;
} else {
System.err.println("Unrecognized mode.");
System.exit(5);
}
} else {
System.err.println("Tried to set mode twice.");
System.exit(4);
}
}
}
if (template == null) {
System.err.println("No template specified.");
System.exit(6);
}
if (input == null) {
System.err.println("No input specified.");
System.exit(7);
}
if (output == null) {
System.err.println("No output specified.");
System.exit(8);
}
if (mode == null) {
mode = Mode.BUFFERED_SAX;
}
SystemErrErrorHandler errorHandler = new SystemErrErrorHandler();
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
XMLReader reader = factory.newSAXParser().getXMLReader();
reader.setErrorHandler(errorHandler);
SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance();
transformerFactory.setErrorListener(errorHandler);
TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler();
reader.setContentHandler(templatesHandler);
reader.parse(new File(template).toURI().toASCIIString());
Templates templates = templatesHandler.getTemplates();
FileOutputStream outputStream = new FileOutputStream(output);
ContentHandler serializer;
if (outputHtml) {
serializer = new HtmlSerializer(outputStream);
} else {
serializer = new XmlSerializer(outputStream);
}
SAXResult result = new SAXResult(new XmlnsDropper(serializer));
result.setLexicalHandler((LexicalHandler) serializer);
if (mode == Mode.DOM) {
Document inputDoc;
DocumentBuilder builder;
if (inputHtml) {
builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET);
} else {
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
try {
builder = builderFactory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
inputDoc = builder.parse(new File(input));
DOMSource inputSource = new DOMSource(inputDoc,
new File(input).toURI().toASCIIString());
Transformer transformer = templates.newTransformer();
transformer.setErrorListener(errorHandler);
transformer.transform(inputSource, result);
} else {
if (inputHtml) {
reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
if (mode == Mode.STREAMING_SAX) {
reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL);
}
}
TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates);
transformerHandler.setResult(result);
reader.setErrorHandler(errorHandler);
reader.setContentHandler(transformerHandler);
reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler);
reader.parse(new File(input).toURI().toASCIIString());
}
outputStream.flush();
outputStream.close();
}
}