/*
* GNU LESSER GENERAL PUBLIC LICENSE Copyright (C) 2006 The Lobo Project
*
* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
* Contact info: xamjadmin@users.sourceforge.net
*/
/*
* Created on Oct 15, 2005
*/
package org.cobra_grendel.html.parser;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import javax.xml.parsers.DocumentBuilder;
import org.apache.commons.lang.NotImplementedException;
import org.cobra_grendel.html.HtmlParserContext;
import org.cobra_grendel.html.HtmlRendererContext;
import org.cobra_grendel.html.UserAgentContext;
import org.cobra_grendel.html.domimpl.HTMLDocumentImpl;
import org.cobra_grendel.html.io.WritableLineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* The <code>DocumentBuilderImpl</code> class is an HTML DOM parser that implements the standard W3C <code>DocumentBuilder</code> interface.
*
* @author J. H. S.
*/
public class DocumentBuilderImpl extends DocumentBuilder
{
private static final Logger LOGGER = LoggerFactory.getLogger(DocumentBuilderImpl.class);
private final UserAgentContext bcontext;
private DOMImplementation domImplementation;
private ErrorHandler errorHandler;
private final HtmlRendererContext rcontext;
private EntityResolver resolver;
/**
* @deprecated HtmlParserContext is no longer used.
* @see #DocumentBuilderImpl(UserAgentContext)
*/
@Deprecated
public DocumentBuilderImpl(final HtmlParserContext context)
{
rcontext = null;
bcontext = context.getUserAgentContext();
}
/**
* @deprecated HtmlParserContext is no longer used.
* @see #DocumentBuilderImpl(UserAgentContext,HtmlRendererContext)
*/
@Deprecated
public DocumentBuilderImpl(final HtmlParserContext context, final HtmlRendererContext rcontext)
{
this.rcontext = rcontext;
bcontext = context.getUserAgentContext();
}
/**
* Constructs a <code>DocumentBuilderImpl</code>. This constructor should be used when only the parsing functionality (without rendering) is required.
*
* @param context
* An instance of {@link org.cobra_grendel.html.UserAgentContext}, which may be an instance of {@link org.cobra_grendel.html.test.SimpleUserAgentContext}.
*/
public DocumentBuilderImpl(final UserAgentContext context)
{
rcontext = null;
bcontext = context;
}
/**
* Constructs a <code>DocumentBuilderImpl</code>. This constructor should be used when rendering is expected.
*
* @param ucontext
* An instance of {@link org.cobra_grendel.html.UserAgentContext}, which may be an instance of {@link org.cobra_grendel.html.test.SimpleUserAgentContext}.
* @param rcontext
* An instance of {@link org.cobra_grendel.html.HtmlRendererContext}, which may be an instance of {@link org.cobra_grendel.html.test.SimpleHtmlRendererContext}.
*/
public DocumentBuilderImpl(final UserAgentContext ucontext, final HtmlRendererContext rcontext)
{
this.rcontext = rcontext;
bcontext = ucontext;
}
/**
* Creates a document without parsing it so it can be used for incremental rendering.
*
* @param is
* The input source, which may be an instance of {@link org.cobra_grendel.html.parser.InputSourceImpl}.
*/
public Document createDocument(final InputSource is, final int transactionId) throws SAXException, IOException
{
String charset = is.getEncoding();
if (charset == null)
{
charset = "US-ASCII";
}
String uri = is.getSystemId();
if (uri == null)
{
LOGGER.warn("parse(): InputSource has no SystemId (URI); document item URLs will not be resolvable.");
}
InputStream in = is.getByteStream();
WritableLineReader wis;
if (in != null)
{
wis = new WritableLineReader(new InputStreamReader(in, charset));
}
else
{
Reader reader = is.getCharacterStream();
if (reader != null)
{
wis = new WritableLineReader(reader);
}
else
{
throw new IllegalArgumentException("InputSource has neither a byte stream nor a character stream");
}
}
HTMLDocumentImpl document = new HTMLDocumentImpl(bcontext, rcontext, wis, uri, transactionId);
return document;
}
// TODO: parseAsync
@Override
public DOMImplementation getDOMImplementation()
{
throw new NotImplementedException();
// synchronized (this)
// {
// if (domImplementation == null)
// {
// domImplementation = new DOMImplementationImpl(bcontext);
// }
// return domImplementation;
// }
}
public ErrorHandler getErrorHandler()
{
return errorHandler;
}
public EntityResolver getResolver()
{
return resolver;
}
@Override
public boolean isNamespaceAware()
{
return false;
}
@Override
public boolean isValidating()
{
return false;
}
@Override
public Document newDocument()
{
// return new HTMLDocumentImpl(bcontext);
throw new NotImplementedException();
}
/**
* Parser an HTML document given as an <code>InputSource</code>.
*
* @param is
* The input source, which may be an instance of {@link org.cobra_grendel.html.parser.InputSourceImpl}.
*/
@Override
public Document parse(final InputSource is) throws org.xml.sax.SAXException, IOException
{
throw new NotImplementedException();
}
public Document parse(final InputSource is, final int transactionId) throws org.xml.sax.SAXException, IOException
{
HTMLDocumentImpl document = (HTMLDocumentImpl) createDocument(is, transactionId);
document.load();
return document;
}
@Override
public void setEntityResolver(final EntityResolver er)
{
resolver = er;
}
@Override
public void setErrorHandler(final ErrorHandler eh)
{
errorHandler = eh;
}
}