/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.camel.support; import java.io.Closeable; import java.io.IOException; import java.io.InputStream; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.camel.Exchange; import org.apache.camel.InvalidPayloadException; import org.apache.camel.language.simple.SimpleLanguage; import org.apache.camel.util.IOHelper; import org.apache.camel.util.ObjectHelper; /** * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token, * where the end token corresponds implicitly to either the end tag or the self-closing start tag. * <p/> * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream * to access the message body. * <p/> * Can be used to split big XML files. * <p/> * This implementation supports inheriting namespaces from a parent/root tag. */ public class TokenXMLExpressionIterator extends ExpressionAdapter { private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")"); private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>"; private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; private static final String OPTION_WRAP_TOKEN = "<*>"; protected final String tagToken; protected final String inheritNamespaceToken; public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) { ObjectHelper.notEmpty(tagToken, "tagToken"); this.tagToken = tagToken; // namespace token is optional this.inheritNamespaceToken = inheritNamespaceToken; } protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) { String tag = tagToken; if (SimpleLanguage.hasSimpleFunction(tag)) { tag = SimpleLanguage.expression(tag).evaluate(exchange, String.class); } String inherit = inheritNamespaceToken; if (inherit != null && SimpleLanguage.hasSimpleFunction(inherit)) { inherit = SimpleLanguage.expression(inherit).evaluate(exchange, String.class); } // must be XML tokens if (!tag.startsWith("<")) { tag = "<" + tag; } if (!tag.endsWith(">")) { tag = tag + ">"; } if (inherit != null) { if (!inherit.startsWith("<")) { inherit = "<" + inherit; } if (!inherit.endsWith(">")) { inherit = inherit + ">"; } } // must be XML tokens if (!tag.startsWith("<") || !tag.endsWith(">")) { throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tag); } if (inherit != null && (!inherit.startsWith("<") || !inherit.endsWith(">"))) { throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inherit); } XMLTokenIterator iterator = new XMLTokenIterator(tag, inherit, in, charset); iterator.init(); return iterator; } @Override public boolean matches(Exchange exchange) { // as a predicate we must close the stream, as we do not return an iterator that can be used // afterwards to iterate the input stream Object value = doEvaluate(exchange, true); return ObjectHelper.evaluateValuePredicate(value); } @Override public Object evaluate(Exchange exchange) { // as we return an iterator to access the input stream, we should not close it return doEvaluate(exchange, false); } /** * Strategy to evaluate the exchange * * @param exchange the exchange * @param closeStream whether to close the stream before returning from this method. * @return the evaluated value */ protected Object doEvaluate(Exchange exchange, boolean closeStream) { InputStream in = null; try { in = exchange.getIn().getMandatoryBody(InputStream.class); // we may read from a file, and want to support custom charset defined on the exchange String charset = IOHelper.getCharsetName(exchange); return createIterator(exchange, in, charset); } catch (InvalidPayloadException e) { exchange.setException(e); // must close input stream IOHelper.close(in); return null; } finally { if (closeStream) { IOHelper.close(in); } } } /** * Iterator to walk the input stream */ static class XMLTokenIterator implements Iterator<Object>, Closeable { final String tagToken; final InputStream in; final String charset; Scanner scanner; Object image; private final Pattern tagTokenPattern; private final String inheritNamespaceToken; private final boolean wrapToken; private Pattern inheritNamespaceTokenPattern; private String rootTokenNamespaces; private String wrapHead; private String wrapTail; XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { this.tagToken = tagToken; this.charset = charset; // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns this.tagTokenPattern = Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), Pattern.MULTILINE | Pattern.DOTALL); this.inheritNamespaceToken = inheritNamespaceToken; if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) { this.wrapToken = true; this.in = new RecordableInputStream(in, charset); } else { this.wrapToken = false; this.in = in; if (inheritNamespaceToken != null) { // the inherit namespace token may itself have a namespace prefix // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines this.inheritNamespaceTokenPattern = Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), Pattern.MULTILINE | Pattern.DOTALL); } } } void init() { // use a scanner with the default delimiter this.scanner = new Scanner(in, charset); this.image = scanner.hasNext() ? (String) next(true) : null; } String getNext(boolean first) { // initialize inherited namespaces on first if (first && inheritNamespaceToken != null && !wrapToken) { rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); } String next = scanner.findWithinHorizon(tagTokenPattern, 0); if (next == null) { return null; } if (first && wrapToken) { MatchResult mres = scanner.match(); wrapHead = ((RecordableInputStream)in).getText(mres.start()); wrapTail = buildXMLTail(wrapHead); } // build answer accordingly to whether namespaces should be inherited or not if (inheritNamespaceToken != null && rootTokenNamespaces != null) { // REVISIT should skip the prefixes that are declared within the child itself. String head = ObjectHelper.before(next, ">"); boolean empty = false; if (head.endsWith("/")) { head = head.substring(0, head.length() - 1); empty = true; } StringBuilder sb = new StringBuilder(); // append root namespaces to local start token // grab the text String tail = ObjectHelper.after(next, ">"); // build result with inherited namespaces next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString(); } else if (wrapToken) { // wrap the token StringBuilder sb = new StringBuilder(); next = sb.append(wrapHead).append(next).append(wrapTail).toString(); } return next; } private String getNamespacesFromNamespaceToken(String text) { if (text == null) { return null; } // find namespaces (there can be attributes mixed, so we should only grab the namespaces) Map<String, String> namespaces = new LinkedHashMap<String, String>(); Matcher matcher = NAMESPACE_PATTERN.matcher(text); while (matcher.find()) { String prefix = matcher.group(1); String url = matcher.group(2); if (ObjectHelper.isEmpty(prefix)) { prefix = "_DEFAULT_"; } else { // skip leading : prefix = prefix.substring(1); } namespaces.put(prefix, url); } // did we find any namespaces if (namespaces.isEmpty()) { return null; } // build namespace String StringBuilder sb = new StringBuilder(); for (Map.Entry<String, String> entry : namespaces.entrySet()) { String key = entry.getKey(); // note the value is already quoted String value = entry.getValue(); if ("_DEFAULT_".equals(key)) { sb.append(" xmlns=").append(value); } else { sb.append(" xmlns:").append(key).append("=").append(value); } } return sb.toString(); } @Override public boolean hasNext() { return image != null; } @Override public Object next() { return next(false); } Object next(boolean first) { Object answer = image; // calculate next if (scanner.hasNext()) { image = getNext(first); } else { image = null; } if (answer == null) { // first time the image may be null answer = image; } return answer; } @Override public void remove() { // noop } @Override public void close() throws IOException { scanner.close(); } } private static String buildXMLTail(String xmlhead) { // assume the input text is a portion of a well-formed xml List<String> tags = new ArrayList<String>(); int p = 0; while (p < xmlhead.length()) { p = xmlhead.indexOf('<', p); if (p < 0) { break; } int nc = xmlhead.charAt(p + 1); if (nc == '?') { p++; continue; } else if (nc == '/') { p++; tags.remove(tags.size() - 1); } else { final int ep = xmlhead.indexOf('>', p); if (xmlhead.charAt(ep - 1) == '/') { p++; continue; } final int sp = xmlhead.substring(p, ep).indexOf(' '); tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep)); p = ep; } } StringBuilder sb = new StringBuilder(); for (int i = tags.size() - 1; i >= 0; i--) { sb.append("</").append(tags.get(i)).append(">"); } return sb.toString(); } }