/** * Copyright 2011 meltmedia * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.xchain.framework.util; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.xchain.framework.util.RegExUtil.compilePattern; /** * Utility class for performing parsing operations. * * @author Devon Tackett * @author Christian Trimble * @author Jason Rose * @author Josh Kennedy */ public class ParserUtil { private static Logger log = LoggerFactory.getLogger(ParserUtil.class); static Pattern attributesBoundaryPattern; static Pattern featuresBoundaryPattern; static Pattern parametersBoundaryPattern; static Pattern outputPropertiesBoundaryPattern; static Pattern attributePattern; static Pattern whitespacePattern; static Pattern remainderOfString; static Pattern featurePattern; private static final Map<String, String> xmlEntityMap = new HashMap<String, String>(); private static Pattern xmlEscapePattern; static { attributePattern = compilePattern("\\s*([^\\s=]+)\\s*=\\s*(\"|\')((?:(?!\\2).)*)\\2", log, "Could not compile attribute pattern."); featurePattern = compilePattern("\\s*([^\\s=]+)\\s*=\\s*(\"|\')\\s*(true|false)\\s*\\2", log, "Could not compile feature pattern."); attributesBoundaryPattern = compilePattern("\\s*attributes(?=\\s|\\Z)", log, "The features boundary pattern did not compile."); featuresBoundaryPattern = compilePattern("\\s*features(?=\\s|\\Z)", log, "The features boundary pattern did not compile."); parametersBoundaryPattern = compilePattern("\\s*parameters(?=\\s|\\Z)", log, "The parameters boundary pattern did not compile."); outputPropertiesBoundaryPattern = compilePattern("\\s*output\\s+properties(?=\\s|\\Z)", log, "The output properties boundary pattern did not compile."); whitespacePattern = compilePattern("\\G(\\s+|\\Z)", log, "Could not compile whitespace pattern"); remainderOfString = compilePattern(".*", log, "Could not compile start pattern."); xmlEscapePattern = compilePattern("&(?:([a-zA-Z|_|:]\\w*)|#(x?)(\\d*))(;?)", log, "Could not compilet xml escape pattern."); // Build the XML Entity map xmlEntityMap.put("quot", "\""); xmlEntityMap.put("apos", "\'"); xmlEntityMap.put("amp", "&"); xmlEntityMap.put("lt", "<"); xmlEntityMap.put("gt", ">"); } /** * Unescape XML escape characters. * * @param xmlData The XML escaped string. * * @return The unescaped string. */ public static String unescapeXML(String xmlData) throws ParseException { StringBuffer escapedData = new StringBuffer(); Matcher match = xmlEscapePattern.matcher(xmlData); while (match.find()) { String entity = match.group(1); String hex = match.group(2); String charCode = match.group(3); String terminator = match.group(4); if (terminator != null && terminator.trim().length() != 0) { if (entity != null) { if (xmlEntityMap.containsKey(entity)) match.appendReplacement(escapedData, xmlEntityMap.get(entity)); else throw new ParseException("Unknown escape entity.", entity, match.regionEnd()); } else if (charCode != null) { int code; if (hex != null && hex.trim().length() != 0) // Hex (base 16) code = Integer.parseInt(charCode, 16); else // Decimal (base 10, default) code = Integer.parseInt(charCode); match.appendReplacement(escapedData, Character.toString((char)code)); } else { throw new ParseException("No escape data found.", match.regionEnd()); } } else { // Throw exception throw new ParseException("Improper data. & found but not terminated with ;", match.regionEnd()); } } match.appendTail(escapedData); return escapedData.toString(); } /** * Unescape parameter characters. * * \\ to \ * \' to ' * \" to " * * @param parameterValue The escaped string. * * @return The unescaped string. */ public static String unescapeParameter(String parameterValue) { return parameterValue.replaceAll("\\\\(\"|\'|\\\\)", "$1"); } /** * Parse a transformer processing instruction. * * @param data The data to parse. * @return A ParsedTransformer object containing the properties from the given data string. * * @throws ParseException If the given input data could not be properly parsed. */ public static ParsedTransformer parseTransformer( String data ) throws ParseException { ParsedTransformer parsedTransformer = null; // create a matcher to pass to the other functions. Matcher matcher = whitespacePattern.matcher( data ); // consume any leading whitespace. if( matcher.lookingAt() ) { advanceRegion(matcher); } // parse the rest of the data. parsedTransformer = parseTransformer( matcher ); // consume any tailing whitespace. if( matcher.lookingAt()) { advanceRegion(matcher); } // Make sure the entire string has been consumed. if( matcher.regionStart() != matcher.regionEnd() ) { throw new ParseException("Extra characters found.", data, matcher.regionStart()); } return parsedTransformer; } /** * Parse a transformer processing instruction. The given matcher will have the attributes, * parameters and output properties consumed. If properly parsed only whitespace should be * left in the matcher. * * @param matcher A matcher containing the parameter data with no leading whitespace. * @return A ParsedTransform object containing the properties from the matcher. * * @throws ParseException If the given input could not be properly parsed. */ private static ParsedTransformer parseTransformer( Matcher matcher ) throws ParseException { ParsedTransformer parsedTransformer = new ParsedTransformer(); // Retrieve the original pattern. Pattern originalPattern = matcher.pattern(); try { // Parse the attributes. parsedTransformer.setAttributes( parseAttributeMap( matcher ) ); if( matcher.usePattern(parametersBoundaryPattern).lookingAt() ) { // Matched on the parameter boundary pattern. // Advance past the boundary pattern. advanceRegion(matcher); // Parse the parameters. parsedTransformer.setParameters(parseAttributeMap( matcher )); } else { // Did not find the parameter boundary pattern. Assume no parameters. parsedTransformer.setParameters(new HashMap<String, String>()); } if( matcher.usePattern(outputPropertiesBoundaryPattern).lookingAt() ) { // Matched on the output properties boundary pattern. // Advance past the boundary pattern. advanceRegion(matcher); // Parse the output properties. parsedTransformer.setOutputProperties(parseAttributeMap( matcher )); } else { // No match on the output properties boundary pattern. Assume no output properties. parsedTransformer.setOutputProperties(new HashMap<String, String>()); } } finally { // Restore the original pattern. matcher.usePattern(originalPattern); } return parsedTransformer; } /** * Parsed the data for an xchain-transformer-factory processing instruction. */ public static ParsedTransformerFactory parseTransformerFactory( String data ) throws ParseException { ParsedTransformerFactory parsedTransformerFactory = null; // create a matcher to pass to the other functions. Matcher matcher = whitespacePattern.matcher( data ); // consume any leading whitespace. if( matcher.lookingAt() ) { advanceRegion(matcher); } // parse the rest of the data. parsedTransformerFactory = parseTransformerFactory( matcher ); // consume any tailing whitespace. if( matcher.lookingAt()) { advanceRegion(matcher); } // Make sure the entire string has been consumed. if( matcher.regionStart() != matcher.regionEnd() ) { throw new ParseException("Extra characters found.", data, matcher.regionStart()); } return parsedTransformerFactory; } private static ParsedTransformerFactory parseTransformerFactory( Matcher matcher ) throws ParseException { ParsedTransformerFactory parsedTransformerFactory = new ParsedTransformerFactory(); // Retrieve the original pattern. Pattern originalPattern = matcher.pattern(); try { // Parse the attributes. parsedTransformerFactory.setFields( parseAttributeMap( matcher ) ); if( matcher.usePattern(featuresBoundaryPattern).lookingAt() ) { // Matched on the features boundary pattern. // Advance past the boundary pattern. advanceRegion(matcher); // Parse the features. parsedTransformerFactory.setFeatures(parseFeatureMap( matcher )); } else { // No match on the features boundary pattern. Assume no features. parsedTransformerFactory.setFeatures(new HashMap<String, Boolean>()); } if( matcher.usePattern(attributesBoundaryPattern).lookingAt() ) { // Matched on the parameter boundary pattern. // Advance past the boundary pattern. advanceRegion(matcher); // Parse the parameters. parsedTransformerFactory.setAttributes(parseAttributeMap( matcher )); } else { // Did not find the parameter boundary pattern. Assume no parameters. parsedTransformerFactory.setAttributes(new HashMap<String, String>()); } } finally { // Restore the original pattern. matcher.usePattern(originalPattern); } return parsedTransformerFactory; } /** * Parse the given matcher into a map of attribute name to attribute value. * * @param matcher The matcher to parse through. * @return A mapping of attribute name to attribute value. * * @throws ParseException If an error was encountered parsing the given data. */ public static Map<String, String> parseAttributeMap( Matcher matcher ) throws ParseException { Map<String, String> attributeMap = new HashMap<String, String>(); Pattern originalPattern = matcher.pattern(); try { // set the pattern for attributes. matcher.usePattern(attributePattern); // consume as many attributes as possible. while(matcher.lookingAt()) { // xml unescape both groups. attributeMap.put(unescapeXML(matcher.group(1)), unescapeXML(matcher.group(3))); advanceRegion(matcher); } } finally { matcher.usePattern(originalPattern); } return attributeMap; } public static Map<String, Boolean> parseFeatureMap( Matcher matcher ) throws ParseException { Map<String, Boolean> featureMap = new HashMap<String, Boolean>(); Pattern originalPattern = matcher.pattern(); try { matcher.usePattern(featurePattern); // consume as many geatures as possible. featureMap.put(unescapeXML(matcher.group(1)), Boolean.valueOf(matcher.group(3))); } finally { matcher.usePattern(originalPattern); } return featureMap; } /** * Advances the region past the last match. * * @param matcher The matcher to advance. * @return The matcher advanced to the next region. */ public static Matcher advanceRegion( Matcher matcher ) { return matcher.region(matcher.regionStart()+matcher.group().length(), matcher.regionEnd()); } }