/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cocoon.transformation; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.HashMap; import java.util.Map; import java.util.Properties; import java.util.StringTokenizer; import org.apache.avalon.framework.configuration.Configurable; import org.apache.avalon.framework.configuration.Configuration; import org.apache.avalon.framework.configuration.ConfigurationException; import org.apache.avalon.framework.parameters.Parameters; import org.apache.cocoon.ProcessingException; import org.apache.cocoon.components.NekoHtmlSaxParser; import org.apache.cocoon.environment.SourceResolver; import org.apache.cocoon.xml.IncludeXMLConsumer; import org.apache.cocoon.xml.dom.DOMBuilder; import org.apache.excalibur.source.Source; import org.w3c.dom.Document; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * Converts (escaped) HTML snippets into tidied HTML using the NekoHTML library. * This transformer expects a list of elements, passed as comma separated * values of the "tags" parameter. It records the text enclosed in such * elements and pass it thru Neko to obtain valid XHTML. * * @version $Id$ */ public class NekoHTMLTransformer extends AbstractSAXTransformer implements Configurable { /** * Properties for Neko format */ private Properties properties; /** * Tags that must be normalized */ private Map tags; /** * React on endElement calls that contain a tag to be * tidied and run Neko on it, otherwise passthru. * * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) */ public void endElement(String uri, String name, String raw) throws SAXException { if (this.tags.containsKey(name)) { String toBeNormalized = this.endTextRecording(); try { this.normalize(toBeNormalized); } catch (ProcessingException e) { e.printStackTrace(); } } super.endElement(uri, name, raw); } /** * Start buffering text if inside a tag to be normalized, * passthru otherwise. * * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public void startElement(String uri, String name, String raw, Attributes attr) throws SAXException { super.startElement(uri, name, raw, attr); if (this.tags.containsKey(name)) { this.startTextRecording(); } } /** * Configure this transformer, possibly passing to it * a jtidy configuration file location. */ public void configure(Configuration config) throws ConfigurationException { super.configure(config); String configUrl = config.getChild("neko-config").getValue(null); if (configUrl != null) { org.apache.excalibur.source.SourceResolver resolver = null; Source configSource = null; try { resolver = (org.apache.excalibur.source.SourceResolver) this.manager.lookup(org.apache.excalibur.source.SourceResolver.ROLE); configSource = resolver.resolveURI(configUrl); if (getLogger().isDebugEnabled()) { getLogger().debug( "Loading configuration from " + configSource.getURI()); } this.properties = new Properties(); this.properties.load(configSource.getInputStream()); } catch (Exception e) { getLogger().warn("Cannot load configuration from " + configUrl); throw new ConfigurationException( "Cannot load configuration from " + configUrl, e); } finally { if (null != resolver) { this.manager.release(resolver); resolver.release(configSource); } } } } /** * The beef: run Neko on the buffered text and stream * the result * * @param text the string to be tidied */ private void normalize(String text) throws ProcessingException { Reader reader = new StringReader(text); try { NekoHtmlSaxParser parser = new NekoHtmlSaxParser(this.properties); DOMBuilder builder = new DOMBuilder(); parser.setContentHandler(builder); parser.parse(new InputSource(reader)); Document doc = builder.getDocument(); IncludeXMLConsumer.includeNode(doc, this.contentHandler, this.lexicalHandler); } catch (Exception e) { throw new ProcessingException( "Exception in NekoHTMLTransformer.normalize()", e); } finally { try { reader.close(); } catch (IOException e) { throw new ProcessingException(e); } } } /** * Setup this component, passing the tag names to be tidied. */ public void setup(SourceResolver resolver, Map objectModel, String src, Parameters par) throws ProcessingException, SAXException, IOException { super.setup(resolver, objectModel, src, par); String tagsParam = par.getParameter("tags", ""); if (getLogger().isDebugEnabled()) { getLogger().debug("tags: " + tagsParam); } this.tags = new HashMap(); StringTokenizer tokenizer = new StringTokenizer(tagsParam, ","); while (tokenizer.hasMoreElements()) { String tok = tokenizer.nextToken().trim(); this.tags.put(tok, tok); } } }