/* * This file is part of "SnipSnap Radeox Rendering Engine". * * Copyright (c) 2002 Stephan J. Schmidt, Matthias L. Jugel * All Rights Reserved. * * Please visit http://radeox.org/ for updates and contact. * * --LICENSE NOTICE-- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * --LICENSE NOTICE-- */ package org.radeox.filter; import java.io.ByteArrayOutputStream; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.xml.serializer.ToXMLStream; import org.radeox.api.engine.context.InitialRenderContext; import org.radeox.filter.context.FilterContext; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.AttributesImpl; /* * The paragraph filter finds any text between two empty lines and inserts a * <p/> @author stephan @team sonicteam * * @version $Id: ParagraphFilter.java 4158 2005-11-25 23:25:19Z * ian@caret.cam.ac.uk $ */ public class XHTMLFilter implements Filter, CacheFilter { private static Log log = LogFactory.getLog(XHTMLFilter.class); private static SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); private static final Map blockElements = new HashMap(); static { saxParserFactory.setNamespaceAware(true); List l = new ArrayList(); l.add("p"); blockElements.put("hr", l); // hr cant be nested inside p blockElements.put("h1", l); blockElements.put("h2", l); blockElements.put("h3", l); blockElements.put("h4", l); blockElements.put("h5", l); blockElements.put("h6", l); blockElements.put("h7", l); blockElements.put("ul", l); blockElements.put("ol", l); blockElements.put("div", l); blockElements.put("blockquote", l); } private static HashMap emptyTag = new HashMap(); static { // inclusion els emptyTag.put("img", "img"); emptyTag.put("area", "area"); emptyTag.put("frame", "frame"); // non-standard inclusion els emptyTag.put("layer", "layer"); emptyTag.put("embed", "embed"); // form el emptyTag.put("input", "input"); // default els emptyTag.put("base", "base"); // styling els emptyTag.put("col", "col"); emptyTag.put("basefont", "basefont"); // hidden els emptyTag.put("link", "link"); emptyTag.put("meta", "meta"); // separator els emptyTag.put("br", "br"); emptyTag.put("hr", "hr"); // here because our current p implementation is broken // emptyTag.put("p", "p"); } private static HashMap ignoreEmpty = new HashMap(); static { ignoreEmpty.put("p", "p"); } private InitialRenderContext initialContext; public String filter(String input, FilterContext context) { String finalOutput = input; try { DeblockFilter dbf = new DeblockFilter(); EmptyFilter epf = new EmptyFilter(); dbf.setBlockElements(blockElements); ByteArrayOutputStream baos = new ByteArrayOutputStream(); SpecialXHTMLSerializer xser = new SpecialXHTMLSerializer(); xser.setOutputStream(baos); xser.setIndent(false); xser.setEncoding("UTF-8"); xser.setIndentAmount(4); dbf.setContentHandler(epf); epf.setContentHander(xser.asContentHandler()); SAXParser parser = saxParserFactory.newSAXParser(); XMLReader xmlr = parser.getXMLReader(); xmlr.setContentHandler(dbf); // log.warn("Input is "+input); xmlr.parse(new InputSource(new StringReader("<sr>" + input + "</sr>"))); String output = new String(baos.toByteArray(), "UTF-8"); int startBlock = output.indexOf("<sr>"); int endBlock = output.indexOf("</sr>"); finalOutput = output.substring(startBlock + 4, endBlock); // log.warn("Output is "+finalOutput); } catch (Throwable t) { log.error("Failed to XHTML check " + t.getMessage() + "\n Input======\n" + input + "\n======="); return input; } return finalOutput; } public String[] replaces() { return FilterPipe.NO_REPLACES; } public String[] before() { return FilterPipe.EMPTY_BEFORE; } public void setInitialContext(InitialRenderContext context) { initialContext = context; } public String getDescription() { return "Hand Coded XHTML filter"; } public class DeblockFilter implements ContentHandler { private Stack s = new Stack(); private ContentHandler ch; private Map blockElements = new HashMap(); public void setContentHandler(ContentHandler ch) { this.ch = ch; } public void setBlockElements(Map blockElements) { this.blockElements = blockElements; } public void addElement(String blockElement, String unnested) { List l = (List) blockElements.get(blockElement); if (l == null) { l = new ArrayList(); blockElements.put(blockElement, l); } l.add(unnested); } /** * Unwind the xpath stack back to the first instance of the requested * emement * * @param deblockElement */ private Stack closeTo(List deblockElements) throws SAXException { int firstIndex = s.size(); for (int i = 0; i < s.size(); i++) { EStack es = (EStack) s.get(i); if (deblockElements.contains(es.lname)) { firstIndex = i; } } EStack es = null; Stack sb = new Stack(); while (s.size() > firstIndex) { es = (EStack) s.pop(); // log.warn("Closing "+es.qname); ch.endElement(es.ns, es.qname, es.lname); sb.push(es); } // log.warn("End Close"); return sb; } /** * Check each element to see if its in a list of elements which is * should not be inside If it is one of these elements, get a list of * elements, and unwind to that it is not inside the stack * * @{inheritDoc} */ public void startElement(String ns, String qname, String lname, Attributes atts) throws SAXException { if (blockElements.get(lname) != null) { s.push(new EStack(ns, qname, lname, atts, closeTo((List) blockElements.get(lname)))); } else { s.push(new EStack(ns, qname, lname, atts, null)); } ch.startElement(ns, qname, lname, atts); } /** * When we get to the end element, pop the Stack element off the stack. * If there is arestore path, restore the path back in place by emitting * start elements * * @{inheritDoc} */ public void endElement(String arg0, String arg1, String arg2) throws SAXException { ch.endElement(arg0, arg1, arg2); EStack es = (EStack) s.pop(); if (es.restore != null) { while (es.restore.size() > 0) { EStack esr = (EStack) es.restore.pop(); // log.warn("Restore "+esr.lname); ch.startElement(esr.ns, esr.qname, esr.lname, esr.atts); s.push(esr); } } } public void characters(char[] arg0, int arg1, int arg2) throws SAXException { ch.characters(arg0, arg1, arg2); } public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { ch.ignorableWhitespace(arg0, arg1, arg2); } public void processingInstruction(String arg0, String arg1) throws SAXException { ch.processingInstruction(arg0, arg1); } public void skippedEntity(String arg0) throws SAXException { ch.skippedEntity(arg0); } public void setDocumentLocator(Locator arg0) { ch.setDocumentLocator(arg0); } public void startDocument() throws SAXException { ch.startDocument(); } public void endDocument() throws SAXException { ch.endDocument(); } public void startPrefixMapping(String arg0, String arg1) throws SAXException { ch.startPrefixMapping(arg0, arg1); } public void endPrefixMapping(String arg0) throws SAXException { ch.endPrefixMapping(arg0); } } public class EmptyFilter implements ContentHandler { private ContentHandler next = null; private EStack lastElement = null; public EmptyFilter() { } public void setContentHander(ContentHandler handler) { next = handler; } public void setDocumentLocator(Locator arg0) { next.setDocumentLocator(arg0); } public void startDocument() throws SAXException { emitLast(); next.startDocument(); } public void endDocument() throws SAXException { emitLast(); next.endDocument(); } public void startPrefixMapping(String arg0, String arg1) throws SAXException { emitLast(); next.startPrefixMapping(arg0, arg1); } public void endPrefixMapping(String arg0) throws SAXException { emitLast(); next.endPrefixMapping(arg0); } public void emitLast() throws SAXException { if (lastElement != null) { // this means that there was a startElement, startElement, // so the lastElement MUST be emited next.startElement(lastElement.ns, lastElement.qname, lastElement.lname, lastElement.atts); lastElement = null; } } public void startElement(String ns, String qname, String lname, Attributes atts) throws SAXException { emitLast(); if (ignoreEmpty.get(lname.toLowerCase()) != null) { lastElement = new EStack(ns, qname, lname, atts, null); } else { next.startElement(ns, qname, lname, atts); } } public void endElement(String arg0, String arg1, String arg2) throws SAXException { if (lastElement != null) { // there was a start, then an end with nothing in between // so ignore alltogether lastElement = null; } else { next.endElement(arg0, arg1, arg2); } } public void characters(char[] arg0, int arg1, int arg2) throws SAXException { emitLast(); next.characters(arg0, arg1, arg2); } public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { emitLast(); next.ignorableWhitespace(arg0, arg1, arg2); } public void processingInstruction(String arg0, String arg1) throws SAXException { emitLast(); next.processingInstruction(arg0, arg1); } public void skippedEntity(String arg0) throws SAXException { emitLast(); next.skippedEntity(arg0); } } public class EStack { public EStack(String ns, String qname, String lname, Attributes atts, Stack restore) { this.ns = ns; this.qname = qname; this.lname = lname; this.atts = new AttributesImpl(atts); this.restore = restore; } public EStack(EStack es) { this.ns = es.ns; this.qname = es.qname; this.lname = es.lname; this.atts = new AttributesImpl(es.atts); this.restore = es.restore; } Stack restore = null; String ns; String qname; String lname; Attributes atts; } /** * @author andrew */ public class SpecialXHTMLSerializer extends ToXMLStream { private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; public void endElement(String namespaceURI, String localName, String name) throws SAXException { if ((namespaceURI != null && !"".equals(namespaceURI) && !namespaceURI .equals(XHTML_NAMESPACE)) || emptyTag.containsKey(localName.toLowerCase())) { super.endElement(namespaceURI, localName, name); return; } this.characters(""); super.endElement(namespaceURI, localName, name); } } }