/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cocoon.components.source; import org.apache.avalon.framework.component.Component; import org.apache.avalon.framework.component.ComponentManager; import org.apache.avalon.framework.logger.AbstractLogEnabled; import org.apache.cocoon.ProcessingException; import org.apache.cocoon.environment.ModifiableSource; import org.apache.cocoon.util.ClassUtils; import org.apache.excalibur.xml.sax.SAXParser; import org.w3c.dom.Document; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.lang.reflect.Method; import java.util.Properties; /** * This abstract class provides convenience methods to implement * a stream based Source. Implement getInputStream(), getSystemId() and * optionally override refresh(), recycle(), getLastModified() and * getContentLength() to obtain a valid Source implementation. * <p> * This base implementation provides services to parse HTML sources * (HTML is not valid XML) using JTidy, if present. The source is * considered to contain HTML if <code>isHTMLContent()</code> returns * true. * * @deprecated Use the new Avalon Excalibur Source Resolving * @author <a href="mailto:sylvain@apache.org">Sylvain Wallez</a> * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a> * @version CVS $Id$ */ public abstract class AbstractStreamSource extends AbstractLogEnabled implements ModifiableSource { /** Is JTidy available? */ private static Class jtidyClass; /** Properties used for converting HTML to XML */ private static Properties xmlProperties; /** The TrAX factory for serializing xml */ public static TransformerFactory transformerFactory = TransformerFactory.newInstance(); /** * Test if JTidy is available */ static { jtidyClass = null; try { jtidyClass = ClassUtils.loadClass("org.w3c.tidy.Tidy"); } catch (ClassNotFoundException cnfe) { // ignore } xmlProperties = new Properties(); xmlProperties.put(OutputKeys.METHOD, "xml"); xmlProperties.put(OutputKeys.OMIT_XML_DECLARATION, "no"); } /** The ComponentManager needed for streaming */ protected ComponentManager manager; /** * Construct a new object */ protected AbstractStreamSource(ComponentManager manager) { this.manager = manager; } /** * Does this source contain HTML ? If true, JTidy will be used (if available) to * parse the input as XML. * <p> * The default here is to return false. Concrete subclasses should override * this if needed. */ protected boolean isHTMLContent() { return false; } /** * Return a new <code>InputSource</code> object */ public InputSource getInputSource() throws IOException, ProcessingException { InputStream stream = this.getInputStream(); if (jtidyClass != null && isHTMLContent()) { try { final Object xhtmlconvert = jtidyClass.newInstance(); Method m = jtidyClass.getMethod("setXmlOut", new Class[] { Class.forName("java.lang.Boolean")}); m.invoke(xhtmlconvert, new Object[] { Boolean.TRUE }); m = jtidyClass.getMethod("setXHTML", new Class[] {Class.forName("java.lang.Boolean")}); m.invoke(xhtmlconvert, new Object[] { Boolean.TRUE }); m = jtidyClass.getMethod("setShowWarnings", new Class[] { Class.forName("java.lang.Boolean")}); m.invoke(xhtmlconvert, new Object[] { Boolean.FALSE }); m = jtidyClass.getMethod("parseDOM", new Class[] { Class.forName("java.io.InputStream"), Class.forName("java.io.OutputStream")}); final Document doc = (Document)m.invoke(xhtmlconvert, new Object[] { stream, null }); final StringWriter writer = new StringWriter(); final Transformer transformer; transformer = transformerFactory.newTransformer(); transformer.setOutputProperties(xmlProperties); transformer.transform(new DOMSource(doc), new StreamResult(writer)); final String xmlstring = writer.toString(); InputSource newObject = new InputSource(new java.io.StringReader(xmlstring)); newObject.setSystemId(this.getSystemId()); return newObject; } catch (Exception ignore) { // Let someone else worry about what we got . This is as before. this.refresh(); stream = this.getInputStream(); } } InputSource newObject = new InputSource(stream); newObject.setSystemId(this.getSystemId()); return newObject; } /** * Stream content to a content handler or to an XMLConsumer. * * @throws SAXException if failed to parse source document. */ public void toSAX(ContentHandler handler) throws SAXException { SAXParser parser = null; try { parser = (SAXParser)this.manager.lookup(SAXParser.ROLE); parser.parse( this.getInputSource(), handler); } catch (SAXException e) { // Preserve original exception throw e; } catch (Exception e){ throw new SAXException("Exception during processing of " + this.getSystemId(), e); } finally { if (parser != null) this.manager.release( (Component)parser); } } /** * Override this method to set the Content Length * */ public long getContentLength() { return -1; } /** * Override this method to set the Last Modification date * */ public long getLastModified() { return 0; } /** * Returns <code>true</code> if <code>getInputStream()</code> succeeds. * Subclasses can provide a more efficient implementation. */ public boolean exists() { try { InputStream stream = getInputStream(); stream.close(); return true; } catch(Exception e) { return false; } } /** * To be overriden in concrete subclasses if needed. */ public void recycle() { } /** * To be overriden in concrete subclasses if needed. */ public void refresh() { } }