/** * Copyright (C) 2009 Orbeon, Inc. * * This program is free software; you can redistribute it and/or modify it under the terms of the * GNU Lesser General Public License as published by the Free Software Foundation; either version * 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * The full text of the license is available at http://www.gnu.org/copyleft/lesser.html */ package org.orbeon.oxf.processor.pipeline; import org.orbeon.dom.Document; import org.orbeon.dom.Element; import org.orbeon.dom.Node; import org.orbeon.oxf.common.ValidationException; import org.orbeon.oxf.pipeline.api.PipelineContext; import org.orbeon.oxf.processor.ProcessorImpl; import org.orbeon.oxf.processor.ProcessorInputOutputInfo; import org.orbeon.oxf.processor.XMLProcessorRegistry; import org.orbeon.oxf.processor.pipeline.ast.*; import org.orbeon.oxf.processor.pipeline.foreach.AbstractForEachProcessor; import org.orbeon.oxf.xml.XPathUtils; import org.orbeon.oxf.xml.dom4j.Dom4jUtils; import org.orbeon.oxf.xml.dom4j.LocationData; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; public class PipelineReader extends ProcessorImpl { private static final Map<String, String> PREFIXES = new HashMap<String, String>(); private static Pattern IDENTIFIER; private static Pattern END; private static Pattern ID_REFERENCE; private static Pattern FUNCTION_CALL; private static Pattern ROOT_ELEMENT_WITHOUT_NS; private static Pattern ROOT_ELEMENT_WITH_NS; private static Pattern FUNCTION_END; private static Pattern FUNCTION_PARAMETER; private static Pattern URL; private static Pattern XPOINTER; static { PREFIXES.put("p", PipelineProcessor.PIPELINE_NAMESPACE_URI); // Used for regexp pattern initialization String SEPARATOR_REGEXP = "[ \\t]*"; String IDENTIFIER_REGEXP = "[_A-Za-z][_A-Za-z\\-0-9.]*"; // Initialize regexp patterns IDENTIFIER = Pattern.compile("^" + IDENTIFIER_REGEXP + "$"); END = Pattern.compile("^" + SEPARATOR_REGEXP + "$"); ID_REFERENCE = Pattern.compile("^" + SEPARATOR_REGEXP + "#(" + IDENTIFIER_REGEXP + ")"); FUNCTION_CALL = Pattern.compile("^" + SEPARATOR_REGEXP + "(" + IDENTIFIER_REGEXP + ")" + SEPARATOR_REGEXP + "\\("); ROOT_ELEMENT_WITHOUT_NS = Pattern.compile("^" + SEPARATOR_REGEXP + "'(" + IDENTIFIER_REGEXP + ")'"); ROOT_ELEMENT_WITH_NS = Pattern.compile("^" + SEPARATOR_REGEXP + "'(" + IDENTIFIER_REGEXP + ":" + IDENTIFIER_REGEXP + ")'"); FUNCTION_END = Pattern.compile("^" + SEPARATOR_REGEXP + "\\)"); FUNCTION_PARAMETER = Pattern.compile("^" + SEPARATOR_REGEXP + ","); URL = Pattern.compile("^" + SEPARATOR_REGEXP + "([^ \\t,\\)#]+)"); XPOINTER = Pattern.compile("^#xpointer\\("); } private ASTPipeline pipeline; // TODO Bug no instance variables public PipelineReader() { addInputInfo(new ProcessorInputOutputInfo("pipeline")); } public void start(PipelineContext context) { final Document pipelineDocument = readInputAsOrbeonDom(context, "pipeline"); pipeline = readPipeline(pipelineDocument, getInputValidity(context, getInputByName("pipeline"))); } public static ASTPipeline readPipeline(Document pipelineDocument, Object validity) { final Element rootElement = pipelineDocument.getRootElement(); final ASTPipeline ast = new ASTPipeline(); ast.setValidity(validity); List<ASTParam> params = new ArrayList<ASTParam>(); // Read params for (final Iterator i = XPathUtils.selectNodeIterator(rootElement, "p:param", PREFIXES); i.hasNext();) { final Element paramElement = (Element) i.next(); final ASTParam param = new ASTParam(); param.setNode(paramElement); param.setName(paramElement.attributeValue("name")); param.setType(paramElement.attributeValue("type").equals("input") ? ASTParam.INPUT : ASTParam.OUTPUT); param.setDebug(paramElement.attributeValue("debug")); param.setSchemaHref(paramElement.attributeValue("schema-href")); param.setSchemaUri(paramElement.attributeValue("schema-uri")); params.add(param); } ast.setNode(rootElement); ast.getParams().addAll(params); // Read and add all statements ast.getStatements().addAll(readStatements(rootElement)); return ast; } private static List<ASTStatement> readStatements(Element containerElement) { List<ASTStatement> result = new ArrayList<ASTStatement>(); for (Iterator i = containerElement.elementIterator(); i.hasNext();) { final Element element = (Element) i.next(); if (element.getName().equals("processor")) { // Processor ASTProcessorCall processorCall = new ASTProcessorCall(XMLProcessorRegistry.extractProcessorQName(element)); { result.add(processorCall); processorCall.setNode(element); processorCall.setId(element.attributeValue("id")); // Inputs/outputs for (Iterator j = element.elementIterator(); j.hasNext();) { final Element inputOutputElement = (Element) j.next(); // Read common attributes class readCommonAttributes { readCommonAttributes(ASTInputOutput inputOutput) { inputOutput.setNode(inputOutputElement); inputOutput.setName(inputOutputElement.attributeValue("name")); inputOutput.setSchemaHref(inputOutputElement.attributeValue("schema-href")); inputOutput.setSchemaUri(inputOutputElement.attributeValue("schema-uri")); inputOutput.setDebug(inputOutputElement.attributeValue("debug")); Iterator childrenIterator = inputOutputElement.elementIterator(); if(childrenIterator.hasNext()) inputOutput.setContent((Element) childrenIterator.next()); } } // Read attributes specific to input or output if (inputOutputElement.getName().equals("input")) { ASTInput input = new ASTInput(); { processorCall.addInput(input); input.setHref(readHref(inputOutputElement, inputOutputElement.attributeValue("href"))); input.setTransform(Dom4jUtils.extractAttributeValueQName(inputOutputElement, "transform")); new readCommonAttributes(input); } } else { ASTOutput output = new ASTOutput(); { processorCall.addOutput(output); String id = inputOutputElement.attributeValue("id"); if (id != null) { if (! IDENTIFIER.matcher(id).find()) throw new ValidationException("Invalid identifier '" + id + "' in 'id' attribute", (LocationData) (inputOutputElement).getData()); output.setId(id); } String ref = inputOutputElement.attributeValue("ref"); if (ref != null) { if (! IDENTIFIER.matcher(ref).find()) throw new ValidationException("Invalid identifier '" + ref + "' in 'id' attribute", (LocationData) (inputOutputElement).getData()); output.setRef(ref); } new readCommonAttributes(output); } } } } } else if (element.getName().equals("choose")) { // ASTChoose ASTChoose choose = new ASTChoose(); { result.add(choose); choose.setNode(element); choose.setHref(readHref(element, element.attributeValue("href"))); choose.setSchemaHref(element.attributeValue("schema-href")); choose.setSchemaUri(element.attributeValue("schema-uri")); choose.setDebug(element.attributeValue("debug")); for (Iterator j = element.elementIterator(); j.hasNext();) { final Element whenElement = (Element) j.next(); ASTWhen when = new ASTWhen(); { choose.addWhen(when); when.setNode(whenElement); when.setTest(whenElement.attributeValue("test")); when.getStatements().addAll(readStatements(whenElement)); } } } } else if (element.getName().equals("for-each")) { // ASTForEach ASTForEach forEach = new ASTForEach(); { result.add(forEach); forEach.setNode(element); forEach.setHref(readHref(element, element.attributeValue("href"))); forEach.setSelect(element.attributeValue("select")); forEach.setId(element.attributeValue("id")); forEach.setRef(element.attributeValue("ref")); forEach.setRoot(element.attributeValue("root")); forEach.setInputSchemaHref(element.attributeValue("input-schema-href")); forEach.setInputSchemaUri(element.attributeValue("input-schema-uri")); forEach.setInputDebug(element.attributeValue("input-debug")); forEach.setOutputSchemaHref(element.attributeValue("output-schema-href")); forEach.setOutputSchemaUri(element.attributeValue("output-schema-uri")); forEach.setOutputDebug(element.attributeValue("output-debug")); forEach.getStatements().addAll(readStatements(element)); } } } return result; } private static ASTHref readHref(Node node, String href) { LocationData locationData = (LocationData) ((Element) node).getData(); HrefResult result = readHrefWorker(locationData, href); // Make sure that everything was consumed if (! END.matcher(result.rest).find()) throw new ValidationException("Can't parse \"" + result.rest + "\" in href", locationData); return result.astHref; } private static HrefResult readHrefWorker(LocationData locationData, String href) { HrefResult result = new HrefResult(); Matcher matcher = null; if (href == null) { // href="..." is not always mandatory } else { matcher = ID_REFERENCE.matcher(href); if (matcher.find()) { // Reference to an id String id = matcher.group(1); ASTHrefId hrefId = new ASTHrefId(); hrefId.setId(id); result.astHref = hrefId; result.rest = href.substring(matcher.end(0)); } else { matcher = FUNCTION_CALL.matcher(href); if (matcher.find()) { // Try to parse this as a "function call" // Parse function name ("aggregate") final String functionName = matcher.group(1); if ("aggregate".equals(functionName)) { href = href.substring(matcher.end(0)); // Parse first argument (root element) matcher = ROOT_ELEMENT_WITHOUT_NS.matcher(href); if (!matcher.find()) { matcher = ROOT_ELEMENT_WITH_NS.matcher(href); if (!matcher.find()) throw new ValidationException("Invalid element name in \"" + href + "\"", locationData); } final String rootElementName = matcher.group(1); href = href.substring(matcher.end(0)); // Parse parameters List<ASTHref> hrefParameters = new ArrayList<ASTHref>(); while (true) { matcher = FUNCTION_END.matcher(href); if (matcher.find()) { // We are at the end of the function call ASTHrefAggregate hrefAggregate = new ASTHrefAggregate(); hrefAggregate.setRoot(rootElementName); hrefAggregate.setHrefs(hrefParameters); result.rest = href.substring(matcher.end(0)); result.astHref = hrefAggregate; break; } else { matcher = FUNCTION_PARAMETER.matcher(href); if (matcher.find()) { // We've got an other parameter href = href.substring(matcher.end(0)); HrefResult parameterResult = readHrefWorker(locationData, href); hrefParameters.add(parameterResult.astHref); href = parameterResult.rest; } else { throw new ValidationException("Can't find \")\" or other href parameter in \"" + href + "\"", locationData); } } } } else if ("current".equals(functionName)) { href = href.substring(matcher.end(0)); matcher = FUNCTION_END.matcher(href); if (!matcher.find()) throw new ValidationException("Expected ')' in current() function call", locationData); ASTHrefId hrefId = new ASTHrefId(); hrefId.setId(AbstractForEachProcessor.FOR_EACH_CURRENT_INPUT); result.astHref = hrefId; result.rest = href.substring(matcher.end(0)); } else { throw new ValidationException("Unsupported function \"" + functionName + "\"", locationData); } } else { matcher = URL.matcher(href); if (matcher.find()) { // URL ASTHrefURL hrefURL = new ASTHrefURL(); hrefURL.setURL(matcher.group(1)); result.astHref = hrefURL; result.rest = href.substring(matcher.end(0)); } else { throw new ValidationException("Can't find id, URL or function call in \"" + href + "\"", locationData); } } } } // Handle optional XPointer expression matcher = XPOINTER.matcher(result.rest); if (matcher.find()) { int parenthesisDepth = 0; boolean inString = false; char quoteType = 0; String rest = result.rest.substring(matcher.end(0)); StringBuilder xpath = new StringBuilder(); while(true) { if (inString) { // Look for end of this string int position = rest.indexOf(quoteType); if (position == -1) throw new ValidationException("Unterminated string", locationData); xpath.append(rest.substring(0, position + 1)); rest = rest.substring(position + 1); inString = false; } else { int firstSingleQuote = rest.indexOf('\''); int firstDoubleQuote = rest.indexOf('"'); int firstOpeningParenthesis = rest.indexOf('('); int firstClosingParenthesis = rest.indexOf(')'); if (firstSingleQuote == -1) firstSingleQuote = Integer.MAX_VALUE; if (firstDoubleQuote == -1) firstDoubleQuote = Integer.MAX_VALUE; if (firstOpeningParenthesis == -1) firstOpeningParenthesis = Integer.MAX_VALUE; if (firstClosingParenthesis == -1) firstClosingParenthesis = Integer.MAX_VALUE; if (firstSingleQuote < firstDoubleQuote && firstSingleQuote < firstOpeningParenthesis && firstSingleQuote < firstClosingParenthesis) { // Start single quoted string quoteType = '\''; inString = true; xpath.append(rest.substring(0, firstSingleQuote + 1)); rest = rest.substring(firstSingleQuote + 1); } else if (firstDoubleQuote < firstSingleQuote && firstDoubleQuote < firstOpeningParenthesis && firstDoubleQuote < firstClosingParenthesis) { // Start double quoted string quoteType = '"'; inString = true; xpath.append(rest.substring(0, firstDoubleQuote + 1)); rest = rest.substring(firstDoubleQuote + 1); } else if (firstOpeningParenthesis < firstSingleQuote && firstOpeningParenthesis < firstDoubleQuote && firstOpeningParenthesis < firstClosingParenthesis) { // Opening parenthesis parenthesisDepth++; xpath.append(rest.substring(0, firstOpeningParenthesis + 1)); rest = rest.substring(firstOpeningParenthesis + 1); } else if (firstClosingParenthesis < firstSingleQuote && firstClosingParenthesis < firstDoubleQuote && firstClosingParenthesis < firstOpeningParenthesis) { // Closing parenthesis if (parenthesisDepth == 0) { // We're at the end of the XPointer expression xpath.append(rest.substring(0, firstClosingParenthesis)); ASTHrefXPointer hrefXPointer = new ASTHrefXPointer(); hrefXPointer.setHref(result.astHref); hrefXPointer.setXpath(xpath.toString()); HrefResult xpointerResult = new HrefResult(); xpointerResult.astHref = hrefXPointer; xpointerResult.rest = rest.substring(firstClosingParenthesis + 1); result = xpointerResult; break; } else { parenthesisDepth--; xpath.append(rest.substring(0, firstClosingParenthesis + 1)); rest = rest.substring(firstClosingParenthesis + 1); } } else { throw new ValidationException("Expected single quote, double quote, opening parenthesis " + "or closing parenthesis in XPointer expression: \"" + rest + "\"", locationData); } } } } return result; } private static class HrefResult { public ASTHref astHref; public String rest = ""; } public ASTPipeline getPipeline() { return pipeline; } }