/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-08 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.xquery.modules;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import javax.xml.transform.Source;
import javax.xml.transform.sax.SAXSource;
import org.apache.log4j.Logger;
import org.exist.memtree.DocumentBuilderReceiver;
import org.exist.memtree.DocumentImpl;
import org.exist.memtree.MemTreeBuilder;
import org.exist.memtree.SAXAdapter;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.NodeValue;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* Utility Functions for XQuery Extension Modules
*
* @author Adam Retter <adam@exist-db.org>
* @serial 200805202059
* @version 1.1
*/
public class ModuleUtils {
protected final static Logger LOG = Logger.getLogger(ModuleUtils.class);
/**
* Takes a String of XML and Creates an XML Node from it using SAX in the
* context of the query
*
* @param context
* The Context of the calling XQuery
* @param str
* The String of XML
*
* @return The NodeValue of XML
*/
public static NodeValue stringToXML(XQueryContext context, String str) throws XPathException, SAXException {
Reader reader = new StringReader(str);
try {
return inputSourceToXML(context, new InputSource(reader));
} finally {
try {
reader.close();
} catch(IOException ioe) {
LOG.warn("Unable to close reader: " + ioe.getMessage(), ioe);
}
}
}
/**
* Takes an InputStream of XML and Creates an XML Node from it using SAX in the
* context of the query
*
* @param context
* The Context of the calling XQuery
* @param is
* The InputStream of XML
*
* @return The NodeValue of XML
*/
public static NodeValue streamToXML(XQueryContext context, InputStream is) throws XPathException, SAXException {
return inputSourceToXML(context, new InputSource(is));
}
/**
* Takes a Source of XML and Creates an XML Node from it using SAX in the
* context of the query
*
* @param context
* The Context of the calling XQuery
* @param src
* The Source of XML
*
* @return The NodeValue of XML
*/
public static NodeValue sourceToXML(XQueryContext context, Source src) throws XPathException, SAXException {
InputSource inputSource = SAXSource.sourceToInputSource(src);
if(inputSource == null){
throw new XPathException(src.getClass().getName() + " is unsupported.");
}
return inputSourceToXML(context, inputSource);
}
/**
* Takes a InputSource of XML and Creates an XML Node from it using SAX in the
* context of the query
*
* @param context
* The Context of the calling XQuery
* @param xml
* The InputSource of XML
*
* @return The NodeValue of XML
*/
public static NodeValue inputSourceToXML(XQueryContext context, InputSource inputSource) throws XPathException, SAXException {
context.pushDocumentContext();
XMLReader reader = null;
try {
// try and construct xml document from input stream, we use eXist's
// in-memory DOM implementation
reader = context.getBroker().getBrokerPool().getParserPool().borrowXMLReader();
LOG.debug( "Parsing XML response ..." );
// TODO : we should be able to cope with context.getBaseURI()
MemTreeBuilder builder = context.getDocumentBuilder();
DocumentBuilderReceiver receiver = new DocumentBuilderReceiver( builder, true );
reader.setContentHandler(receiver);
reader.parse(inputSource);
Document doc = receiver.getDocument();
// return (NodeValue)doc.getDocumentElement();
return((NodeValue)doc);
} catch(IOException e) {
throw(new XPathException(e.getMessage()));
} finally {
context.popDocumentContext();
if(reader != null){
context.getBroker().getBrokerPool().getParserPool().returnXMLReader(reader);
}
}
}
/**
* Takes a HTML InputSource and creates an XML representation of the HTML by
* tidying it (uses NekoHTML)
*
* @param context
* The Context of the calling XQuery
* @param srcHtml
* The Source for the HTML
* @param parserFeatures
* The features to set on the Parser
* @param parserProperties
* The properties to set on the Parser
*
* @return An in-memory Document representing the XML'ised HTML
*/
public static DocumentImpl htmlToXHtml(XQueryContext context, String url, Source srcHtml, Map<String, Boolean> parserFeatures, Map<String, String>parserProperties) throws XPathException, SAXException {
InputSource inputSource = SAXSource.sourceToInputSource(srcHtml);
if(inputSource == null){
throw new XPathException(srcHtml.getClass().getName() + " is unsupported.");
}
return htmlToXHtml(context, url, inputSource, parserFeatures, parserProperties);
}
/**
* Takes a HTML InputSource and creates an XML representation of the HTML by
* tidying it (uses NekoHTML)
*
* @param context
* The Context of the calling XQuery
* @param srcHtml
* The InputSource for the HTML
* @param parserFeatures
* The features to set on the Parser
* @param parserProperties
* The properties to set on the Parser
*
* @return An in-memory Document representing the XML'ised HTML
*/
public static DocumentImpl htmlToXHtml(XQueryContext context, String url, InputSource srcHtml, Map<String, Boolean> parserFeatures, Map<String, String>parserProperties) throws XPathException, SAXException {
// we use eXist's in-memory DOM implementation
org.exist.memtree.DocumentImpl memtreeDoc = null;
// use Neko to parse the HTML content to XML
XMLReader reader = null;
try {
LOG.debug("Converting HTML to XML using NekoHTML parser for: " + url);
reader = (XMLReader) Class.forName("org.cyberneko.html.parsers.SAXParser").newInstance();
if(parserFeatures != null) {
for(Entry<String, Boolean> parserFeature : parserFeatures.entrySet()) {
reader.setFeature(parserFeature.getKey(), parserFeature.getValue());
}
}
if(parserProperties == null) {
//default: do not modify the case of elements and attributes
reader.setProperty("http://cyberneko.org/html/properties/names/elems","match");
reader.setProperty("http://cyberneko.org/html/properties/names/attrs","no-change");
} else {
for(Entry<String, String> parserProperty : parserProperties.entrySet()) {
reader.setProperty(parserProperty.getKey(), parserProperty.getValue());
}
}
} catch(Exception e) {
String errorMsg = "Error while invoking NekoHTML parser. ("
+ e.getMessage()
+ "). If you want to parse non-wellformed HTML files, put "
+ "nekohtml.jar into directory 'lib/user'.";
LOG.error(errorMsg, e);
throw new XPathException(errorMsg, e);
}
SAXAdapter adapter = new SAXAdapter();
reader.setContentHandler(adapter);
try {
reader.parse(srcHtml);
} catch (IOException e) {
throw new XPathException(e.getMessage(), e);
}
Document doc = adapter.getDocument();
memtreeDoc = (DocumentImpl) doc;
memtreeDoc.setContext(context);
return memtreeDoc;
}
/**
* Parses a structure like <parameters><param name="a" value="1"/><param
* name="b" value="2"/></parameters> into a set of Properties
*
* @param nParameters
* The parameters Node
* @return a set of name value properties for representing the XML
* parameters
*/
public static Properties parseParameters(Node nParameters)
throws XPathException {
return parseProperties(nParameters, "param");
}
/**
* Parses a structure like <properties><property name="a" value="1"/><property
* name="b" value="2"/></properties> into a set of Properties
*
* @param nProperties
* The properties Node
* @return a set of name value properties for representing the XML
* properties
*/
public static Properties parseProperties(Node nProperties)
throws XPathException {
return parseProperties(nProperties, "property");
}
/**
* Parses a structure like <properties><property name="a" value="1"/><property
* name="b" value="2"/></properties> into a set of Properties
*
* @param container
* The container of the properties
* @param elementName
* The name of the property element
* @return a set of name value properties for representing the XML
* properties
*/
private final static Properties parseProperties(Node container,
String elementName) throws XPathException {
Properties properties = new Properties();
if (container != null && container.getNodeType() == Node.ELEMENT_NODE) {
NodeList params = ((Element) container)
.getElementsByTagName(elementName);
for (int i = 0; i < params.getLength(); i++) {
Element param = ((Element) params.item(i));
String name = param.getAttribute("name");
String value = param.getAttribute("value");
if (name != null && value != null) {
properties.setProperty(name, value);
} else {
LOG.warn("Name or value attribute missing for "
+ elementName);
}
}
}
return properties;
}
}