/* GNU LESSER GENERAL PUBLIC LICENSE Copyright (C) 2006 The Lobo Project This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Contact info: lobochief@users.sourceforge.net */ /* * Created on Oct 15, 2005 */ package org.lobobrowser.html.parser; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import org.lobobrowser.html.HtmlRendererContext; import org.lobobrowser.html.domimpl.DOMImplementationImpl; import org.lobobrowser.html.domimpl.HTMLDocumentImpl; import org.lobobrowser.html.io.WritableLineReader; import org.lobobrowser.ua.UserAgentContext; import org.w3c.dom.DOMImplementation; import org.w3c.dom.Document; import org.xml.sax.EntityResolver; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * The <code>DocumentBuilderImpl</code> class is an HTML DOM parser that * implements the standard W3C <code>DocumentBuilder</code> interface. * * @author J. H. S. */ public class DocumentBuilderImpl extends DocumentBuilder { private static final Logger logger = Logger.getLogger(DocumentBuilderImpl.class.getName()); private EntityResolver resolver; private ErrorHandler errorHandler; private final UserAgentContext bcontext; private final HtmlRendererContext rcontext; /** * Constructs a <code>DocumentBuilderImpl</code>. This constructor should be * used when only the parsing functionality (without rendering) is required. * * @param context * An instance of {@link org.lobobrowser.html.UserAgentContext}, * which may be an instance of * {@link org.lobobrowser.html.test.SimpleUserAgentContext}. */ public DocumentBuilderImpl(final UserAgentContext context) { this.rcontext = null; this.bcontext = context; } /** * Constructs a <code>DocumentBuilderImpl</code>. This constructor should be * used when rendering is expected. * * @param ucontext * An instance of {@link org.lobobrowser.html.UserAgentContext}, * which may be an instance of * {@link org.lobobrowser.html.test.SimpleUserAgentContext}. * @param rcontext * An instance of {@link org.lobobrowser.html.HtmlRendererContext}, * which may be an instance of * {@link org.lobobrowser.html.test.SimpleHtmlRendererContext}. */ public DocumentBuilderImpl(final UserAgentContext ucontext, final HtmlRendererContext rcontext) { this.rcontext = rcontext; this.bcontext = ucontext; } /** * Constructs a <code>DocumentBuilderImpl</code>. This constructor should be * used when rendering is expected. * * @param rcontext * An instance of {@link org.lobobrowser.html.HtmlRendererContext}, * which may be an instance of * {@link org.lobobrowser.html.test.SimpleHtmlRendererContext}. */ public DocumentBuilderImpl(final HtmlRendererContext rcontext) { this.rcontext = rcontext; this.bcontext = rcontext.getUserAgentContext(); } /** * Parses an HTML document. Note that this method will read the entire input * source before returning a <code>Document</code> instance. * * @param is * The input source, which may be an instance of * {@link org.lobobrowser.html.parser.InputSourceImpl}. * @see #createDocument(InputSource) */ @Override public Document parse(final InputSource is) throws org.xml.sax.SAXException, IOException { final HTMLDocumentImpl document = (HTMLDocumentImpl) this.createDocument(is, ""); document.load(); return document; } /** * Creates a document without parsing the input provided, so the document * object can be used for incremental rendering. * * @param is * The input source, which may be an instance of * {@link org.lobobrowser.html.parser.InputSourceImpl}. The input * source must provide either an input stream or a reader. * @see HTMLDocumentImpl#load() */ public Document createDocument(final InputSource is, final String contentType) throws SAXException, IOException { final String encoding = is.getEncoding(); String charset = encoding; if (charset == null) { charset = "US-ASCII"; } final String uri = is.getSystemId(); if (uri == null) { logger.warning("parse(): InputSource has no SystemId (URI); document item URLs will not be resolvable."); } WritableLineReader wis; final Reader reader = is.getCharacterStream(); if (reader != null) { wis = new WritableLineReader(reader); } else { final InputStream in = is.getByteStream(); if (in != null) { wis = new WritableLineReader(new InputStreamReader(in, charset)); } else if (uri != null) { throw new IllegalArgumentException("The input source didn't have a character stream, nor an inputstream!"); /* // To comply with the InputSource documentation, we need // to do this: final java.net.URLConnection connection = new java.net.URL(uri).openConnection(); in = connection.getInputStream(); if (encoding == null) { charset = org.lobobrowser.util.Urls.getCharset(connection); } wis = new WritableLineReader(new InputStreamReader(in, charset)); */ } else { throw new IllegalArgumentException("The InputSource must have either a reader, an input stream or a URI."); } } final HTMLDocumentImpl document = new HTMLDocumentImpl(this.bcontext, this.rcontext, wis, uri, contentType); return document; } @Override public boolean isNamespaceAware() { return false; } @Override public boolean isValidating() { return false; } @Override public void setEntityResolver(final EntityResolver er) { this.resolver = er; } @Override public void setErrorHandler(final ErrorHandler eh) { this.errorHandler = eh; } @Override public Document newDocument() { return new HTMLDocumentImpl(this.bcontext); } private DOMImplementation domImplementation; @Override public DOMImplementation getDOMImplementation() { synchronized (this) { if (this.domImplementation == null) { this.domImplementation = new DOMImplementationImpl(this.bcontext); } return this.domImplementation; } } public ErrorHandler getErrorHandler() { return errorHandler; } public EntityResolver getResolver() { return resolver; } }