/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.util.xsltfilter; import java.net.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.ServletContext; import javax.servlet.http.HttpServletRequest; import javax.xml.transform.*; import javax.xml.transform.sax.TransformerHandler; import org.carrot2.util.xslt.TemplatesPool; import org.carrot2.util.xslt.TransformerErrorListener; import org.slf4j.Logger; import org.xml.sax.*; import org.xml.sax.ContentHandler; /** * A SAX handler that detects <code>xml-stylesheet</code> directive and delegates SAX * events to a declared transformer. */ final class TransformingDocumentHandler implements ContentHandler { private static final Logger log = org.slf4j.LoggerFactory.getLogger(TransformingDocumentHandler.class); /** * A map of XSLT output methods and their corresponding MIME content types. */ private final static HashMap<String, String> methodMapping; static { methodMapping = new HashMap<String, String>(); methodMapping.put("xml", "application/xml"); methodMapping.put("html", "text/html"); methodMapping.put("text", "text/plain"); } /** * A regular expression for extracting <code>xml-stylesheet</code>'s * <code>type</code> pseudo-attribute. */ private final Pattern typePattern = Pattern.compile( "(type[ \t]*=[ \\t]*\")([^\"]*)(\")", Pattern.CASE_INSENSITIVE); /** * A regular expression for extracting <code>xml-stylesheet</code>'s * <code>href</code> pseudo-attribute. */ private final Pattern hrefPattern = Pattern.compile( "(href[ \\t]*=[ \\t]*\")([^\"]*)(\")", Pattern.CASE_INSENSITIVE); /** * A regular expression for extracting <code>ext-stylesheet</code>'s * <code>resource</code> pseudo-attribute. */ private final Pattern resourcePattern = Pattern.compile( "(resource[ \\t]*=[ \\t]*\")([^\"]*)(\")", Pattern.CASE_INSENSITIVE); /** * Current request for which this handler works. Used for resolving relative * URIs. */ private HttpServletRequest request; /** * The default handler used when no <code>xml-stylesheet</code> directive is * specified in the XML stream. */ private TransformerHandler defaultHandler; /** * The actual content handler (transformer) used for processing the input. */ private TransformerHandler contentHandler; /** * Servlet context for resolving local paths. */ private ServletContext context; /** * A result sink where the transformation output should be redirected. */ private Result result; /** * Transformer error listener. */ private TransformerErrorListener transformerErrorListener = new TransformerErrorListener(); /** * Locator instance used by this handler is also shared with the transformation * handler. */ private Locator locator; /** * A pool of precompiled stylesheets. */ private TemplatesPool pool; /** * */ private IContentTypeListener contentTypeListener; /** * A set of stylesheet parameters, copied from the request context when the * transformation begins. */ private final Map<String, Object> stylesheetParams; /** * Creates a SAX handler with the given base application URL and context path. The * base URL is needed to resolve host-relative stylesheet URIs. Application context * path is used to initialize local streams instead of requesting the stylesheet via * HTTP. */ public TransformingDocumentHandler(HttpServletRequest request, ServletContext context, Map<String, Object> stylesheetParams, TemplatesPool pool) { this.request = request; this.context = context; this.pool = pool; this.stylesheetParams = stylesheetParams; } /** * {@link ContentHandler} implementation. */ public void startDocument() throws SAXException { // Empty. We don't know the actual content handler yet. } /** * {@link ContentHandler} implementation. */ public void characters(final char [] ch, final int start, final int length) throws SAXException { initContentHandler(); contentHandler.characters(ch, start, length); } /** * {@link ContentHandler} implementation. */ public void endDocument() throws SAXException { initContentHandler(); try { contentHandler.endDocument(); } catch (RuntimeException t) { final TransformerException transformerException = transformerErrorListener.exception; if (transformerException != null) { final Throwable cause = transformerException.getCause(); if (cause != null && cause instanceof Exception) { throw new SAXException("XSLT transformation error.", (Exception) cause); } else { throw new SAXException("XSLT transformation error.", transformerException); } } } } /** * {@link ContentHandler} implementation. */ public void endElement(String namespaceURI, String localName, String qName) throws SAXException { initContentHandler(); contentHandler.endElement(namespaceURI, localName, qName); } /** * {@link ContentHandler} implementation. */ public void endPrefixMapping(String prefix) throws SAXException { initContentHandler(); contentHandler.endPrefixMapping(prefix); } /** * {@link ContentHandler} implementation. */ public void ignorableWhitespace(char [] ch, int start, int length) throws SAXException { /* * Pass ignorable whitespace if we have a content handler. Before content handler * initialization simply ignore these calls. We could queue SAX events until * content handler is available, but would it make any sense? */ if (contentHandler != null) { contentHandler.ignorableWhitespace(ch, start, length); } } /** * {@link ContentHandler} implementation. */ public void startPrefixMapping(String prefix, String uri) throws SAXException { this.initContentHandler(); contentHandler.startPrefixMapping(prefix, uri); } /** * {@link ContentHandler} implementation. Detect processing instructions and see if we * have <code>xml-stylesheet</code> anywhere. */ public void processingInstruction(String target, String data) throws SAXException { if (contentHandler == null) { inspectProcessingInstruction(this, target, data); } initContentHandler(); contentHandler.processingInstruction(target, data); } /** * {@link ContentHandler} implementation. */ public void setDocumentLocator(Locator locator) { this.locator = locator; } /** * {@link ContentHandler} implementation. */ public void skippedEntity(String name) throws SAXException { this.initContentHandler(); contentHandler.skippedEntity(name); } /** * {@link ContentHandler} implementation. */ public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { this.initContentHandler(); contentHandler.startElement(namespaceURI, localName, qName, atts); } /** * Replaces the default transformer handler with the given one. */ private void setTransformerHandler(TransformerHandler fallbackHandler) throws SAXException { if (contentHandler != null) { throw new SAXException( "Some input has been already processed. Cannot change the handler anymore. " + "Place xml-stylesheet " + "directive immediately at the top of the XML file."); } final Transformer transformer = fallbackHandler.getTransformer(); /* * Pass any stylesheet parameters to the transformer. */ if (stylesheetParams != null) { for (Iterator<Map.Entry<String, Object>> i = stylesheetParams.entrySet() .iterator(); i.hasNext();) { final Map.Entry<String, Object> entry = i.next(); transformer.setParameter((String) entry.getKey(), entry.getValue()); } } this.defaultHandler = fallbackHandler; } /** * Sets a {@link IContentTypeListener} for this transformation. */ public final void setContentTypeListener(IContentTypeListener l) { this.contentTypeListener = l; } /** * This method should be invoked to cleanup after processing is done. */ public final void cleanup() { if (this.defaultHandler != null) { /* * Reset the default handler's transformer. */ this.defaultHandler.getTransformer().reset(); } } /** * Sets the result sink for the xslt transformation. */ public void setTransformationResult(Result result) { this.result = result; } /** * Process <code>xml-stylesheet</code>. */ private URI processXmlStylesheet(String target, String data) { if (!target.equals("xml-stylesheet")) return null; /* * Break up pseudo-attributes and look for content-type */ final Matcher typeMatcher = typePattern.matcher(data); if (!typeMatcher.find()) { log.warn("xml-stylesheet directive with no type attribute (should be text/xsl)."); return null; } final String type = typeMatcher.group(2); if (!"text/xsl".equals(type)) { log.warn("xml-stylesheet directive with incorrect type (should be text/xsl): " + type); return null; } final Matcher hrefMatcher = hrefPattern.matcher(data); if (!hrefMatcher.find()) { log.warn("xml-stylesheet directive with no 'href' pseudo-attribute."); return null; } URI base = URI.create(request.getRequestURI()); String stylesheetURI = hrefMatcher.group(2); return base.resolve(stylesheetURI); } /** * Process <code>ext-stylesheet</code> of the following form: * <pre> * <?ext-stylesheet resource="webapp-resource" ?> * </pre> * where <code>webapp-resource</code> is an application-context relative resource. */ private URI processExtStylesheet(String target, String data) { if (!target.equals("ext-stylesheet")) return null; final Matcher resourceMatcher = resourcePattern.matcher(data); if (!resourceMatcher.find()) { log.warn("ext-stylesheet directive with no 'resource' attribute."); return null; } final String stylesheetURI = resourceMatcher.group(2); try { final URL stylesheetURL = context.getResource(stylesheetURI); return stylesheetURL == null ? null : stylesheetURL.toURI(); } catch (MalformedURLException e) { log.error("Malformed stylesheet URL: " + stylesheetURI, e); } catch (URISyntaxException e) { log.error("Stylesheet URI conversion error: " + stylesheetURI, e); } return null; } /** * Inspect a processing instruction looking for <code>xml-stylesheet</code> * or <code>ext-stylesheet</code> directives. If found, update the * {@link TransformingDocumentHandler#setTransformerHandler(TransformerHandler)} * appropriately. */ public void inspectProcessingInstruction(TransformingDocumentHandler handler, String target, String data) throws SAXException { URI uri; if ((uri = processExtStylesheet(target, data)) != null) { log.debug("Resolved ext-stylesheet URI: " + uri.toString()); } else if ((uri = processXmlStylesheet(target, data)) != null) { log.debug("Resolved xml-stylesheet URI: " + uri.toString()); } else { // Skip unknown processing instructions. return; } /* * Check the pool for precompiled cached Templates */ final String uriString = uri.toString(); Templates template; try { template = pool.getTemplate(uriString); if (template == null) { template = pool.compileTemplate(uriString); pool.addTemplate(uriString, template); } // Find out about the content type and encoding. if (contentTypeListener != null) { final Properties outputProps = template.getOutputProperties(); final String encoding; /* * If you're tempted to use Properties@containsKey, see * http://issues.carrot2.org/browse/CARROT-507 */ String contentType = null; if (hasKey(outputProps, OutputKeys.MEDIA_TYPE)) { contentType = outputProps.getProperty(OutputKeys.MEDIA_TYPE); } else if (hasKey(outputProps, OutputKeys.METHOD)) { final String method = outputProps.getProperty(OutputKeys.METHOD); contentType = (String) methodMapping.get(method); } if (contentType == null) { // Default content type. contentType = (String) methodMapping.get("xml"); } if (hasKey(outputProps, OutputKeys.ENCODING)) { encoding = outputProps.getProperty(OutputKeys.ENCODING); } else { encoding = "UTF-8"; } contentTypeListener.setContentType(contentType, encoding); } final TransformerHandler tHandler = pool.newTransformerHandler(template); tHandler.getTransformer().setErrorListener(transformerErrorListener); handler.setTransformerHandler(tHandler); } catch (TransformerConfigurationException e) { log.error("Transformer configuration exception.", e); } } /** * Properties by default extend from HashMap, but can contain a backup set * of keys as set in {@link Properties#Properties(Properties)}. Unfortunately, * while {@link Properties#getProperty(String)} works with these default * values, {@link Properties#containsKey(Object)} does not. In this method * we check for the existence of a key by trying to load it. */ private static boolean hasKey(Properties props, String key) { return props.getProperty(key) != null; } /** * Initializes the content handler because content is about to be sent to the result. * If no content handler is available, throws an exception. */ private final void initContentHandler() throws SAXException { if (contentHandler == null) { if (defaultHandler == null) { log.info("Stylesheet not specified, using identity handler."); try { this.defaultHandler = this.pool.getIdentityTransformerHandler(); } catch (TransformerConfigurationException e) { throw new RuntimeException("Could not create identity handler."); } if (contentTypeListener != null) { contentTypeListener.setContentType((String) methodMapping.get("xml"), null); } } log.debug("XSLT transformation using handler: " + defaultHandler.getClass().getName()); this.contentHandler = defaultHandler; this.contentHandler.setResult(result); this.contentHandler.startDocument(); if (locator != null) { this.contentHandler.setDocumentLocator(locator); } } if (transformerErrorListener.exception != null) { throw new SAXException("XSLT transformation error.", transformerErrorListener.exception); } } }