package com.dappit.Dapper.parser;
import java.io.FileOutputStream;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import org.dom4j.DocumentException;
import org.w3c.dom.Document;
/**
* An HTML parser that can take a stream of bytes and return a DOM Document
* object.
*
* @author Tony Novak
*/
public interface HTMLParser
{
/**
* Parse an html document, returning a DOM document.
*
* @param htmlBytes Raw bytes to parse
* @param htmlEncoding The character set (e.g. <tt>UTF-8</tt>) for
* decoding <i>htmlBytes</i>. If <tt>null</tt>
* is passed, the default HTTP encoding
* (<tt>ISO-8559-1</tt>) is used, unless the
* character set is overridden by a
* <tt><META></tt> tag in the HTML body.
* If a non-null value is passed, any such
* <tt><META></tt> tag is ignored.
*/
public Document parse( byte[] htmlBytes, String htmlEncoding,FileOutputStream optinalOutputStream ) throws DocumentException, ParserException, ParserConfigurationException,IOException;
}