/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cocoon.transformation; import java.io.IOException; import java.io.Serializable; import java.util.Map; import java.util.LinkedList; import java.util.HashSet; import java.util.Set; import java.util.StringTokenizer; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.apache.cocoon.ProcessingException; import org.apache.cocoon.caching.CacheableProcessingComponent; import org.apache.cocoon.environment.SourceResolver; import org.apache.cocoon.transformation.AbstractSAXTransformer; import org.apache.avalon.framework.configuration.Configuration; import org.apache.avalon.framework.configuration.ConfigurationException; import org.apache.avalon.framework.parameters.Parameters; import org.apache.excalibur.source.SourceValidity; import org.apache.excalibur.source.impl.validity.NOPValidity; /** * Cleanup transformer: Removes excess whitespace while adding some where needed * for legibility. Strips unwanted namespace declarations. * * <p>The cleanup transformer can be used for basically any document as-is or customized by * schema (inline vs. block elements) for easier reading.</p> * * <p>Transformer declaration: * <map:components> * <map:transformers> * <map:transformer name="htmlcleanup" * src="org.apache.cocoon.transformation.CleanupTransformer"> * <preserve-uri>*</preserve-uri> * </map:transformer> * * <map:transformer name="xhtmlcleanup" * src="org.apache.cocoon.transformation.CleanupTransformer"> * <inline-elements>a,abbr,acronym,b,br,font,i,u,img</inline-elements> * <preserve-uri>http://www.w3.org/1999/xhtml</preserve-uri> * </map:transformer> * </map:transformers> * </map:components> * </p> * * <p>The "inline-elements" configuration element refers to a list of element names that are * <strong>not</strong> to be indented. The "preserve-uri" configuration element specifies a * namespace uri mapping that is meant for output. All other namespace declarations are * stripped from the output. The "preserve-uri" element may appear more than once. If * "preserve-uri" is omitted, all namespaces/prefixes are removed from the output.</p> * * <p>Transformer usage: * <transform type="xhtmlcleanup"> * <map:parameter name="indent-size" value="4"/> * </transform> * </p> * * <p>The optional parameter "indent-size" specifies the number of additional space characters * appearing at each level of the output document. The default value is 2.</p> * * <p>Bugs: Nested namespace declarations with the same namespace prefix will break the code.</p> * * @author Miles Elam */ public class CleanupTransformer extends AbstractSAXTransformer implements CacheableProcessingComponent { private static final char[] INDENT = ("\n" + " " + " " ).toCharArray(); private static final int MAX_INDENT = CleanupTransformer.INDENT.length - 1; private boolean allowAllURIs = false; private Set allowedURIs = new HashSet(); private Set inlineElements = new HashSet(); private LinkedList uriPrefixes = new LinkedList(); private int indentSize = 2; private int numIndents = 0; private String lastElement; /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#configure(org.apache.avalon.framework.configuration.Configuration) */ public void configure(Configuration conf) throws ConfigurationException { StringTokenizer st; Configuration inlineEltChild = conf.getChild("inline-elements"); st = new StringTokenizer(inlineEltChild.getValue(""), ","); this.inlineElements.clear(); while (st.hasMoreTokens()) { String nextElement = st.nextToken().trim(); if (nextElement.length() > 0) { this.inlineElements.add(nextElement); } } this.allowAllURIs = false; Configuration[] uriChildren = conf.getChildren("preserve-uri"); for (int i=0; i<uriChildren.length; ++i) { String nextChild = uriChildren[i].getValue("").trim(); if (nextChild.length() == 0) { continue; } else if (nextChild.equals("*")) { this.allowAllURIs = true; break; } this.allowedURIs.add(nextChild); } } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#setup(org.apache.cocoon.environment.SourceResolver, java.util.Map, java.lang.String, org.apache.avalon.framework.parameters.Parameters) */ public void setup (SourceResolver resolver, Map objectModel, String src, Parameters par) throws ProcessingException, SAXException, IOException { super.setup(resolver, objectModel, src, par); this.indentSize = par.getParameterAsInteger("indent-size", 2); } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#recycle() */ public void recycle () { super.recycle(); this.numIndents = 0; this.lastElement = null; this.uriPrefixes.clear(); } /** * @see org.apache.cocoon.caching.CacheableProcessingComponent#getKey() */ public Serializable getKey () { return Integer.toString(this.indentSize); } /** * @see org.apache.cocoon.caching.CacheableProcessingComponent#getValidity() */ public SourceValidity getValidity () { return NOPValidity.SHARED_INSTANCE; } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#startPrefixMapping(java.lang.String, java.lang.String) */ public void startPrefixMapping (String prefix, String uri) throws SAXException { if (this.allowAllURIs) { this.contentHandler.startPrefixMapping(prefix, uri); } else if (this.allowedURIs.contains(uri)) { this.contentHandler.startPrefixMapping(prefix, uri); uriPrefixes.add(prefix); } } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#endPrefixMapping(java.lang.String) */ public void endPrefixMapping (String prefix) throws SAXException { if (this.allowAllURIs) { this.contentHandler.endPrefixMapping(prefix); } else if (!uriPrefixes.isEmpty()) { if (uriPrefixes.getLast().toString().equals(prefix)) { this.contentHandler.endPrefixMapping(prefix); uriPrefixes.removeLast(); } } } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public void startElement (String uri, String qName, String lName, Attributes attrs) throws SAXException { if (!inlineElements.contains(qName)) { int indentSize = (this.indentSize * this.numIndents) % MAX_INDENT; this.contentHandler.ignorableWhitespace(INDENT, 0, indentSize + 1); ++this.numIndents; this.lastElement = qName; } this.contentHandler.startElement(uri, qName, lName, attrs); } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#endElement(java.lang.String, java.lang.String, java.lang.String) */ public void endElement (String uri, String qName, String lName) throws SAXException { if (!inlineElements.contains(qName)) { --this.numIndents; if (this.lastElement == null || !this.lastElement.equals(qName)) { int indentSize = (this.indentSize * this.numIndents) % MAX_INDENT; this.contentHandler.ignorableWhitespace(INDENT, 0, indentSize + 1); } this.lastElement = null; } this.contentHandler.endElement(uri, qName, lName); } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#characters(char[], int, int) */ public void characters (char[] ch, int start, int length) throws SAXException { int end = start + length; for (int i=start; i<end; ++i) { if (!Character.isWhitespace(ch[i])) { this.contentHandler.characters(ch, start, length); return; } } this.contentHandler.characters(INDENT, 1, 1); } /** * @see org.apache.cocoon.transformation.AbstractSAXTransformer#ignorableWhitespace(char[], int, int) */ public void ignorableWhitespace (char[] ch, int start, int length) throws SAXException { // Do nothing } }