/*
GNU GENERAL LICENSE
Copyright (C) 2006 The Lobo Project. Copyright (C) 2014 - 2017 Lobo Evolution
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
verion 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General License for more details.
You should have received a copy of the GNU General Public
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contact info: lobochief@users.sourceforge.net; ivan.difrancesco@yahoo.it
*/
/*
* Created on Oct 15, 2005
*/
package org.lobobrowser.html.parser;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import javax.xml.parsers.DocumentBuilder;
import org.lobobrowser.html.HtmlRendererContext;
import org.lobobrowser.html.domimpl.DOMImplementationImpl;
import org.lobobrowser.html.domimpl.HTMLDocumentImpl;
import org.lobobrowser.html.io.WritableLineReader;
import org.lobobrowser.http.UserAgentContext;
import org.lobobrowser.util.SSLCertificate;
import org.lobobrowser.util.Urls;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* The <code>DocumentBuilderImpl</code> class is an HTML DOM parser that
* implements the standard W3C <code>DocumentBuilder</code> interface.
*
* @author J. H. S.
*/
public class DocumentBuilderImpl extends DocumentBuilder {
/** The Constant logger. */
private static final Logger logger = LogManager.getLogger(DocumentBuilderImpl.class.getName());
/** The resolver. */
private EntityResolver resolver;
/** The error handler. */
private ErrorHandler errorHandler;
/** The bcontext. */
private final UserAgentContext bcontext;
/** The rcontext. */
private final HtmlRendererContext rcontext;
/**
* Constructs a <code>DocumentBuilderImpl</code>. This constructor should be
* used when only the parsing functionality (without rendering) is required.
*
* @param context
* An instance of {@link org.lobobrowser.http.UserAgentContext},
* which may be an instance of
* {@link org.lobobrowser.html.test.SimpleUserAgentContext}.
*/
public DocumentBuilderImpl(UserAgentContext context) {
this.rcontext = null;
this.bcontext = context;
}
/**
* Constructs a <code>DocumentBuilderImpl</code>. This constructor should be
* used when rendering is expected.
*
* @param ucontext
* An instance of {@link org.lobobrowser.http.UserAgentContext},
* which may be an instance of
* {@link org.lobobrowser.html.test.SimpleUserAgentContext}.
* @param rcontext
* An instance of
* {@link org.lobobrowser.html.HtmlRendererContext}, which may be
* an instance of
* {@link org.lobobrowser.html.test.SimpleHtmlRendererContext}.
*/
public DocumentBuilderImpl(UserAgentContext ucontext, HtmlRendererContext rcontext) {
this.rcontext = rcontext;
this.bcontext = ucontext;
}
/**
* Constructs a <code>DocumentBuilderImpl</code>. This constructor should be
* used when rendering is expected.
*
* @param rcontext
* An instance of
* {@link org.lobobrowser.html.HtmlRendererContext}, which may be
* an instance of
* {@link org.lobobrowser.html.test.SimpleHtmlRendererContext}.
*/
public DocumentBuilderImpl(HtmlRendererContext rcontext) {
this.rcontext = rcontext;
this.bcontext = rcontext.getUserAgentContext();
}
/**
* Parses an HTML document. Note that this method will read the entire input
* source before returning a <code>Document</code> instance.
*
* @param is
* The input source, which may be an instance of
* {@link org.lobobrowser.html.parser.InputSourceImpl}.
* @return the document
* @throws SAXException
* the SAX exception
* @throws IOException
* Signals that an I/O exception has occurred.
* @see #createDocument(InputSource)
*/
@Override
public Document parse(InputSource is) throws org.xml.sax.SAXException, IOException {
HTMLDocumentImpl document = (HTMLDocumentImpl) this.createDocument(is);
document.load();
return document;
}
/**
* Creates a document without parsing the input provided, so the document
* object can be used for incremental rendering.
*
* @param is
* The input source, which may be an instance of
* {@link org.lobobrowser.html.parser.InputSourceImpl}. The input
* source must provide either an input stream or a reader.
* @return the document
* @throws SAXException
* the SAX exception
* @throws IOException
* Signals that an I/O exception has occurred.
* @see HTMLDocumentImpl#load()
*/
public Document createDocument(InputSource is) throws SAXException, IOException {
String encoding = is.getEncoding();
String charset = encoding;
if (charset == null) {
charset = "UTF-8";
}
String uri = is.getSystemId();
if (uri == null) {
logger.warn("parse(): InputSource has no SystemId (URI); document item URLs will not be resolvable.");
}
WritableLineReader wis;
Reader reader = is.getCharacterStream();
if (reader != null) {
wis = new WritableLineReader(reader);
} else {
InputStream in = is.getByteStream();
if (in != null) {
wis = new WritableLineReader(new InputStreamReader(in, charset));
} else if (uri != null) {
SSLCertificate.setCertificate();
URLConnection connection = new URL(uri).openConnection();
in = connection.getInputStream();
if (encoding == null) {
charset = Urls.getCharset(connection);
}
wis = new WritableLineReader(new InputStreamReader(in, charset));
} else {
throw new IllegalArgumentException(
"The InputSource must have either a reader, an input stream or a URI.");
}
}
HTMLDocumentImpl document = new HTMLDocumentImpl(this.bcontext, this.rcontext, wis, uri);
return document;
}
/*
* (non-Javadoc)
*
* @see javax.xml.parsers.DocumentBuilder#isNamespaceAware()
*/
@Override
public boolean isNamespaceAware() {
return false;
}
/*
* (non-Javadoc)
*
* @see javax.xml.parsers.DocumentBuilder#isValidating()
*/
@Override
public boolean isValidating() {
return false;
}
/*
* (non-Javadoc)
*
* @see javax.xml.parsers.DocumentBuilder#setEntityResolver(org.xml.sax.
* EntityResolver )
*/
@Override
public void setEntityResolver(EntityResolver er) {
this.resolver = er;
}
/*
* (non-Javadoc)
*
* @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.
* ErrorHandler)
*/
@Override
public void setErrorHandler(ErrorHandler eh) {
this.errorHandler = eh;
}
/*
* (non-Javadoc)
*
* @see javax.xml.parsers.DocumentBuilder#newDocument()
*/
@Override
public Document newDocument() {
return new HTMLDocumentImpl(this.bcontext);
}
/** The dom implementation. */
private DOMImplementation domImplementation;
/*
* (non-Javadoc)
*
* @see javax.xml.parsers.DocumentBuilder#getDOMImplementation()
*/
@Override
public DOMImplementation getDOMImplementation() {
synchronized (this) {
if (this.domImplementation == null) {
this.domImplementation = new DOMImplementationImpl(this.bcontext);
}
return this.domImplementation;
}
}
/**
* Gets the error handler.
*
* @return the error handler
*/
public ErrorHandler getErrorHandler() {
return errorHandler;
}
/**
* Gets the resolver.
*
* @return the resolver
*/
public EntityResolver getResolver() {
return resolver;
}
}