/**
* Copyright 2011 meltmedia
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.xchain.framework.osgi;
import static org.xchain.framework.util.ParserUtil.advanceRegion;
import static org.xchain.framework.util.RegExUtil.compilePattern;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.regex.Matcher;
import org.xchain.framework.util.ParseException;
/**
* A parser for OSGi Manifest headers.
* TODO: Rework error handling in this file.
* TODO: Remove unneeded regular expressions from file.
* TODO: Combine expressions where possible to reduce the ammount of matching.
*
* @author Christian Trimble
* @author John Trimble
*/
public class ManifestParser
{
private static String DIGIT_REGEX = "[0-9]";
private static String ALPHA_REGEX = "[a-zA-Z]";
private static String ALPHA_NUM_REGEX = "[a-zA-Z0-9]";
private static String TOKEN_REGEX = "[a-zA-Z0-9_-]+";
private static String ZERO_OR_MORE_WHITESPACE_REGEX = "\\s*";
private static String ONE_OR_MORE_WHITESPACE_REGEX = "\\s+";
/**
* extended ::= ( alphanum | ’_’ | ’-’ | ’.’ )+
*/
public static final String EXTENDED_REGEX = "[-a-zA-Z0-9_.]+";
/**
* quoted-string::= ’"’ ( [^"\#x0D#x0A#x00] | ’\"’|’\\’)* ’"’
*/
public static final String QUOTED_STRING_REGEX = "\"[^\"\\\\\\x0D\\x0A\\x00]*\"";
public static final String COLON_EQUALS_REGEX = ":=";
public static final String EQUALS_REGEX = "=";
/**
* This has been changed to allow spaces and tabs.
* path ::= path-unquoted | (’"’ path-unquoted ’"’)
* path-unquoted::= path-sep | path-sep? path-element (path-sep path-element)*
* path-element ::= [^/"\#x0D#x0A#x00]+
* path-sep ::= ’/’
*/
public static final String QUOTED_PATH_REGEX = "\"((?:/?[^/\"\\\\\\x0D\\x0A\\x00]+(?:/[^/\"\\\\\\x0D\\x0A\\x00]+)*)|/)\"";
/**
* This has been modified to remove ';' and ',' characters. Without these changes, this could consume too many characters.
* path ::= path-unquoted | (’"’ path-unquoted ’"’)
* path-unquoted::= path-sep | path-sep? path-element (path-sep path-element)*
* path-element ::= [^/"\#x0D#x0A#x00]+
* path-sep ::= ’/’
*/
public static final String UNQUOTED_PATH_REGEX = "(?:(?:/?[^/\"\\\\\\x0D\\x0A\\x00;,]+(?:/[^/\"\\\\\\x0D\\x0A\\x00;,]+)*)|/)";
public static final String START_PARAMETER_REGEX = TOKEN_REGEX+"\\s*:?=";
public static final String QUOTE_REGEX = "\"";
public static final String SEMICOLON_REGEX = ";";
static Pattern TOKEN_PATTERN = null;
static Pattern EXTENDED_PATTERN = null;
static Pattern QUOTED_STRING_PATTERN = null;
static Pattern COLON_EQUALS_PATTERN = null;
static Pattern EQUALS_PATTERN = null;
static Pattern ZERO_OR_MORE_WHITESPACE_PATTERN = null;
static Pattern ONE_OR_MORE_WHITESPACE_PATTERN = null;
static Pattern QUOTED_PATH_PATTERN = null;
static Pattern UNQUOTED_PATH_PATTERN = null;
static Pattern START_PARAMETER_PATTERN = null;
static Pattern QUOTE_PATTERN = compilePattern(QUOTE_REGEX, "Could not compile quote pattern.");
static Pattern SEMICOLON_PATTERN = compilePattern(SEMICOLON_REGEX, "Could not compile semicolon pattern.");
static Pattern COMMA_PATTERN = compilePattern(",", "Could not compile comma pattern.");
static {
TOKEN_PATTERN = compilePattern(TOKEN_REGEX, "Could not compile token pattern.");
EXTENDED_PATTERN = compilePattern(EXTENDED_REGEX, "Could not compile extended pattern.");
QUOTED_STRING_PATTERN = compilePattern(QUOTED_STRING_REGEX, "Could not compile quoted-string pattern.");
COLON_EQUALS_PATTERN = compilePattern(COLON_EQUALS_REGEX, "Could not compile colon equals pattern.");
EQUALS_PATTERN = compilePattern(EQUALS_REGEX, "Could not compile equals pattern.");
ZERO_OR_MORE_WHITESPACE_PATTERN = compilePattern(ZERO_OR_MORE_WHITESPACE_REGEX, "Could not compile whitespace pattern.");
ONE_OR_MORE_WHITESPACE_PATTERN = compilePattern(ONE_OR_MORE_WHITESPACE_REGEX, "Could not compile whitespace pattern.");
QUOTED_PATH_PATTERN = compilePattern(QUOTED_PATH_REGEX, "Could not compile the quoted path pattern.");
UNQUOTED_PATH_PATTERN = compilePattern(UNQUOTED_PATH_REGEX, "Could not compile the unquoted path pattern.");
START_PARAMETER_PATTERN = compilePattern(START_PARAMETER_REGEX, "Could not compile the start parameter pattern.");
}
/**
* This method parses the Bundle-ClassPath manifest header.
*
* Bundle-ClassPath BNF: (OSGi 4.2 Specification - 3.8.1)
* Bundle-ClassPath::= entry ( ’,’ entry )*
* entry ::= target ( ’;’ target )* ( ’;’ parameter ) *
* target ::= path | ’.’
*
* Path BNF: (OSGi 4.2 Specification - 1.3.2)
* path ::= path-unquoted | (’"’ path-unquoted ’"’)
* path-unquoted::= path-sep | path-sep? path-element (path-sep path-element)*
* path-element ::= [^/"\#x0D#x0A#x00]+
* path-sep ::= ’/’
*/
public static List<ParsedClassPathEntry> parseClassPathEntries( String bundleClassPath )
throws Exception
{
List<ParsedClassPathEntry> entryList = new ArrayList<ParsedClassPathEntry>();
// create the matcher.
Matcher matcher = ZERO_OR_MORE_WHITESPACE_PATTERN.matcher(bundleClassPath);
// remove leading whitespace.
consumeWhitespace(matcher, false);
// get the first entry.
ParsedClassPathEntry entry = parseClassPathEntry(matcher);
entryList.add(entry);
consumeWhitespace(matcher, false);
while( lookingAt(matcher, COMMA_PATTERN) ) {
advanceRegion(matcher);
consumeWhitespace(matcher, false);
entry = parseClassPathEntry(matcher);
entryList.add(entry);
consumeWhitespace(matcher, false);
}
consumeWhitespace(matcher, false);
// verify that we are at the end of the matcher.
// TODO: Make sure that we consumed all of the header.
return entryList;
}
static ParsedClassPathEntry parseClassPathEntry( Matcher matcher )
throws Exception
{
ParsedClassPathEntry entry = new ParsedClassPathEntry();
Pattern originalPattern = matcher.pattern();
try {
entry.getTargetList().add(parseTarget(matcher));
consumeWhitespace( matcher, false );
// some lookahead is needed here to properly parse this part of the grammar.
while( lookingAt(matcher, SEMICOLON_PATTERN) ) {
int regionStart = matcher.regionStart();
advanceRegion( matcher );
consumeWhitespace( matcher, false );
// if this could not be a parameter, then consume the token.
if( !lookingAt(matcher, START_PARAMETER_PATTERN) ) {
entry.getTargetList().add(parseTarget(matcher));
consumeWhitespace( matcher, false );
}
else {
// reset the region, so that we can parse the parameters.
matcher.region(regionStart, matcher.regionEnd());
}
}
while( lookingAt(matcher, SEMICOLON_PATTERN) ) {
advanceRegion( matcher );
consumeWhitespace( matcher, false );
entry.getParameterList().add(parseParameter(matcher));
}
}
finally {
matcher.usePattern(originalPattern);
}
return entry;
}
/**
* target ::= path | ’.’
* path ::= path-unquoted | (’"’ path-unquoted ’"’)
* path-unquoted::= path-sep | path-sep? path-element (path-sep path-element)*
* path-element ::= [^/"\#x0D#x0A#x00]+
* path-sep ::= ’/’
*/
static String parseTarget( Matcher matcher )
throws Exception
{
Pattern originalPattern = matcher.pattern();
try {
if( lookingAt(matcher, QUOTE_PATTERN) ) {
return parseQuotedPath(matcher);
}
else {
return parseUnquotedPath(matcher);
}
}
finally {
matcher.usePattern(originalPattern);
}
}
static String parseQuotedPath( Matcher matcher )
throws Exception
{
Pattern originalPattern = matcher.pattern();
try {
if( lookingAt( matcher, QUOTED_PATH_PATTERN ) ) {
String path = matcher.group(1);
advanceRegion( matcher );
return path;
}
else {
throw new RuntimeException("Could not parse quoted path.");
}
}
finally {
matcher.usePattern(originalPattern);
}
}
static String parseUnquotedPath( Matcher matcher )
throws Exception
{
Pattern originalPattern = matcher.pattern();
try {
if( lookingAt( matcher, UNQUOTED_PATH_PATTERN ) ) {
String path = matcher.group().trim();
advanceRegion( matcher );
return path;
}
else {
throw new RuntimeException("Could not parse quoted path.");
}
}
finally {
matcher.usePattern(originalPattern);
}
}
/**
* Parses a parameter from the specified matcher. The matcher passed to this method must be looking at the start of a parameter.
*
* parameter ::= directive | attribute
* directive ::= token ’:=’ argument
* attribute ::= token ’=’ argument
*
* @param matcher the matcher to parse the parameter from.
* @return the parsed parameter.
*/
static ParsedParameter parseParameter( Matcher matcher )
throws Exception
{
ParsedParameter parameter = new ParsedParameter();
Pattern originalPattern = matcher.pattern();
try {
if( lookingAt(matcher, TOKEN_PATTERN) ) {
parameter.setName(matcher.group());
advanceRegion(matcher);
}
else {
throw new RuntimeException("Could not find token at start of parameter.");
}
consumeWhitespace(matcher, false);
if( lookingAt( matcher, COLON_EQUALS_PATTERN ) ) {
parameter.setType(ParameterType.DIRECTIVE);
advanceRegion(matcher);
}
else if( lookingAt( matcher, EQUALS_PATTERN ) ) {
parameter.setType(ParameterType.ATTRIBUTE);
advanceRegion(matcher);
}
else {
throw new RuntimeException("Expecting := or =");
}
consumeWhitespace(matcher, false);
parameter.setValue(parseArgument(matcher));
}
finally {
matcher.usePattern(originalPattern);
}
return parameter;
}
/**
* @param matcher the matcher that the whitespace will be consumed from.
* @param mandatory if true, the whitespace in this location mandatory, otherwise
* the whitespace is optional.
*/
static void consumeWhitespace( Matcher matcher, boolean mandatory )
throws Exception
{
Pattern originalPattern = matcher.pattern();
try {
if( lookingAt( matcher, mandatory ? ONE_OR_MORE_WHITESPACE_PATTERN : ZERO_OR_MORE_WHITESPACE_PATTERN ) ) {
advanceRegion(matcher);
}
else {
throw new RuntimeException("Expected whitespace, but there was none.");
}
}
finally {
matcher.usePattern(originalPattern);
}
}
/**
* Perses an argument from the OSGi Core Specification. Any quoted string are unescaped by this method.
*
* extended ::= ( alphanum | ’_’ | ’-’ | ’.’ )+
* quoted-string::= ’"’ ( [^"\#x0D#x0A#x00] | ’\"’|’\\’)* ’"’
* argument ::= extended | quoted-string
*
*/
static String parseArgument( Matcher matcher )
throws Exception
{
Pattern originalPattern = matcher.pattern();
try {
if( lookingAt( matcher, EXTENDED_PATTERN ) ) {
return matcher.group();
}
else if( lookingAt( matcher, QUOTED_STRING_PATTERN ) ) {
return unescapeQuotedString(matcher.group());
}
// TODO: Better error handling here.
throw new RuntimeException("Could not find argument.");
}
finally {
matcher.usePattern(originalPattern);
}
}
/**
* Removed the surrounding quotation marks and unescapes '"' and '\' characters in a quoted string. This method assumes
* that the string passed in conforms to the definition of a quoted-string found in the osgi core specification. If the string
* is malformed, the results of this method are unspecified.
*
* @param quotedString a string that conforms the quoted-string BNF found in the OSGi Core Specification.
* @return the unescaped string.
*/
static String unescapeQuotedString(String quotedString)
{
return quotedString.replaceAll("\\A\"(.*)\"\\Z", "$1").replaceAll("\\\\([\\\\\"])", "$1");
}
/**
* digit ::= [0..9]
* alpha ::= [a..zA..Z]
* alphanum ::= alpha | digit
* token ::= ( alphanum | ’_’ | ’-’ )+
*/
static String parseToken( Matcher matcher )
throws ParseException
{
Pattern originalPattern = matcher.pattern();
try {
matcher.usePattern(TOKEN_PATTERN);
if( matcher.lookingAt() ) {
return matcher.group();
}
else {
throw new RuntimeException("Could not parse token at "+matcher.regionStart());
}
}
finally {
matcher.usePattern(originalPattern);
}
}
/**
* Returns true if the matcher is looking at the specified pattern, false otherwise. If true is returned, the pattern for the matcher is the pattern specified.
* If false is returned, then the pattern for the matcher is not changed.
* NOTE: This should be moved to the RegexUtil.
*/
static boolean lookingAt( Matcher matcher, Pattern pattern )
throws Exception
{
Pattern originalPattern = matcher.pattern();
boolean lookingAt = false;
try {
matcher.usePattern(pattern);
return (lookingAt = matcher.lookingAt());
}
finally {
if( !lookingAt ) {
matcher.usePattern(originalPattern);
}
}
}
}