/*
* #!
* Ontopia Engine
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.xml;
import java.io.IOException;
import java.net.URL;
import java.io.BufferedInputStream;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.io.FileWriter;
import org.w3c.tidy.Tidy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Generate tidied HTML from a URL stream.
* It's possible to use multiple Tidy-Processes
* running on separate threads.
*
*/
public class TidyHTML implements Runnable {
private String strUrl;
private String outFileName;
private String errOutFileName;
private boolean xmlOut;
protected Logger log;
{
log = LoggerFactory.getLogger(this.getClass());
}
/**
* Specify <code>InputStream</code> which should been parsed,
* and the <code>OutputStream</code> to which the pretty-printed result
* should been send.
*
* @param errOutFileName Name of the file for writing out errors/warnings
* @param xmlOut generate XML output, otherwise HTML
*/
public TidyHTML(String strUrl, String outFileName,
String errOutFileName, boolean xmlOut) {
this.strUrl = strUrl;
this.outFileName = outFileName;
this.errOutFileName = errOutFileName;
this.xmlOut = xmlOut;
}
/**
* start the tidification
*/
public void run() {
URL url;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setXmlOut(xmlOut);
try {
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
url = new URL(strUrl);
in = new BufferedInputStream(url.openStream());
out = new FileOutputStream(outFileName);
tidy.parse(in, out);
}
catch ( IOException e ) {
log.warn( this.toString() + e.toString() );
}
}
/**
* convenience wrapper to wait until one process has finished
*/
public void tidify() {
Thread thread = new Thread(this);
thread.start();
// wait until thread has finished
do {
try {
Thread.sleep(10);
} catch (InterruptedException e) {
}
} while (thread.isAlive());
}
/**
* only for test purposes
*/
public static void main( String[] args ) {
TidyHTML t = new TidyHTML(args[0], args[1], args[2], true);
t.tidify();
}
}