/**
* Copyright 2010-2014 Three Crickets LLC.
* <p>
* The contents of this file are subject to the terms of a BSD license. See
* attached license.txt.
* <p>
* Alternatively, you can obtain a royalty free commercial license with less
* limitations, transferable or non-transferable, directly from Three Crickets
* at http://threecrickets.com/
*/
package org.sikuli.syntaxhighlight.grammar;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.sikuli.syntaxhighlight.ResolutionException;
import org.sikuli.syntaxhighlight.grammar.def.ChangeStateTokenRuleDef;
import org.sikuli.syntaxhighlight.grammar.def.SaveDef;
import org.sikuli.syntaxhighlight.grammar.def.TokenRuleDef;
import org.sikuli.syntaxhighlight.grammar.def.UsingRuleDef;
/**
* @author Tal Liron
*/
public class RegexLexer extends Lexer
{
// //////////////////////////////////////////////////////////////////////////
// Protected
@Override
public List<Token> getTokensUnprocessed( String text )
{
List<Token> tokens = new ArrayList<Token>();
// Start at root state
LinkedList<State> stateStack = new LinkedList<State>();
State state = getState( "root" );
stateStack.add( state );
int pos = 0;
int length = text.length();
while( pos < length )
{
int eol = text.indexOf( '\n', pos );
// int endRegion = eol >= 0 ? eol + 2 : length;
// if( endRegion > length )
// endRegion = length;
int endRegion = length;
boolean matches = false;
// Does any rule in the current state match at the current position?
// System.out.println("Text: " + text.substring( pos ));
for( Rule rule : new ArrayList<Rule>( state.getRules() ) )
{
if( rule instanceof PatternRule )
{
PatternRule patternRule = (PatternRule) rule;
// System.out.println( "Trying pattern: " +
// rule.getPattern().pattern() );
Matcher matcher = patternRule.getPattern().matcher( text );
// From current position to end of line
// matcher.useTransparentBounds( true );
matcher.region( pos, endRegion );
if( matcher.lookingAt() )
{
// System.out.println( "Match! " + matcher.group() + " "
// +
// rule );
// Yes, so apply it!
if( rule instanceof TokenRule )
{
TokenRule tokenRule = (TokenRule) rule;
List<TokenType> tokenTypes = tokenRule.getTokenTypes();
if( tokenTypes.size() == 1 )
// Single token
tokens.add( new Token( pos, tokenTypes.get( 0 ), matcher.group() ) );
else
{
if( tokenTypes.size() != matcher.groupCount() )
throw new RuntimeException( "The number of token types in the rule does not match the number of groups in the regular expression" );
// Multiple tokens by group
int group = 1;
for( TokenType tokenType : tokenTypes )
{
String value = matcher.group( group );
// System.out.println( matcher.pattern() +
// " " +
// value + " " + tokenType );
// pos = matcher.start( group );
tokens.add( new Token( pos, tokenType, value ) );
// pos = matcher.end( group );
group++;
}
}
// Change state
List<State> nextStates = tokenRule.getNextStates();
if( nextStates != null )
{
for( State nextState : nextStates )
{
if( nextState instanceof RelativeState )
{
RelativeState relativeState = (RelativeState) nextState;
if( relativeState.isPush() )
// Push
stateStack.addLast( state );
else
// Pop
for( int depth = relativeState.getDepth(); ( depth > 0 ) && !stateStack.isEmpty(); depth-- )
state = stateStack.removeLast();
}
else
{
// Push and switch
stateStack.addLast( state );
state = nextState;
}
}
}
/*
* else { // Pop if( stateStack.size() > 1 ) state =
* stateStack.removeLast(); }
*/
}
else if( rule instanceof UsingRule )
{
UsingRule usingRule = (UsingRule) rule;
// System.err.println( "!!!!!!!" +
// rule.getPattern().pattern() );
// System.err.println( "!!!!!!!!!!!!!!" +
// matcher.group().length() );
Iterable<Token> usingTokens = usingRule.getLexer().getTokensUnprocessed( matcher.group() );
for( Token usingToken : usingTokens )
tokens.add( usingToken );
}
pos = matcher.end();
// System.out.println( pos );
matches = true;
// Don't process other rules here
break;
}
}
else if( rule instanceof SaveRule )
{
SaveRule saveRule = (SaveRule) rule;
State saveState = saveRule.getState();
if( saveState != state )
{
saveState.getRules().clear();
saveState.include( state );
}
}
}
if( !matches )
{
// tokens.add( new Token( pos, TokenType.Error, state.getName()
// ) );
if( pos != eol )
{
// Unmatched character
tokens.add( new Token( pos, TokenType.Error, text.substring( pos, pos + 1 ) ) );
}
else
{
// Fallback for states that don't explicitly match new
// lines.
tokens.add( new Token( pos, TokenType.Text, "\n" ) );
// Reset state stack
/*
* state = getState( "root" ); stateStack.clear();
* stateStack.addLast( state );
*/
}
pos += 1;
}
}
return tokens;
}
@SuppressWarnings("unchecked")
@Override
protected void addJson( Map<String, Object> json ) throws ResolutionException
{
super.addJson( json );
// Initialize constants
Object constantsObject = json.get( "constants" );
Map<String, List<String>> constants = new HashMap<String, List<String>>();
if( constantsObject != null )
{
if( !( constantsObject instanceof Map<?, ?> ) )
throw new ResolutionException( "\"constants\" must be a map" );
for( Map.Entry<String, Object> entry : ( (Map<String, Object>) constantsObject ).entrySet() )
{
String constantName = entry.getKey();
Object constantObject = entry.getValue();
ArrayList<String> strings = new ArrayList<String>();
constants.put( constantName, strings );
if( constantObject instanceof List<?> )
{
StringBuilder pattern = new StringBuilder();
for( String patternElement : (List<String>) constantObject )
pattern.append( patternElement );
strings.add( pattern.toString() );
}
else if( constantObject instanceof String )
strings.add( (String) constantObject );
else
throw new ResolutionException( "Unexpected value in \"constants\" map: " + constantObject );
}
}
// Flags
int defaultFlags = Pattern.MULTILINE;
Object flagsObject = json.get( "flags" );
if( flagsObject != null )
{
if( !( flagsObject instanceof List<?> ) )
throw new ResolutionException( "\"flags\" must be an array of strings" );
for( Object flagObject : (List<?>) flagsObject )
{
if( !( flagObject instanceof String ) )
throw new ResolutionException( "\"flags\" must be an array of strings" );
String flag = (String) flagObject;
if( flag.equalsIgnoreCase( "CANON_EQ" ) )
defaultFlags |= Pattern.CANON_EQ;
else if( flag.equalsIgnoreCase( "CASE_INSENSITIVE" ) || flag.equalsIgnoreCase( "IGNORECASE" ) )
defaultFlags |= Pattern.CASE_INSENSITIVE;
else if( flag.equalsIgnoreCase( "COMMENTS" ) )
defaultFlags |= Pattern.COMMENTS;
else if( flag.equalsIgnoreCase( "DOTALL" ) )
defaultFlags |= Pattern.DOTALL;
else if( flag.equalsIgnoreCase( "LITERAL" ) )
defaultFlags |= Pattern.LITERAL;
else if( flag.equalsIgnoreCase( "MULTILINE" ) )
defaultFlags |= Pattern.MULTILINE;
else if( flag.equalsIgnoreCase( "UNICODE_CASE" ) )
defaultFlags |= Pattern.UNICODE_CASE;
else if( flag.equalsIgnoreCase( "UNIX_LINES" ) )
defaultFlags |= Pattern.UNIX_LINES;
else
throw new ResolutionException( "\"flags\" contains an unrecognized flag: " + flag );
}
}
Object statesObject = json.get( "states" );
if( statesObject == null )
throw new ResolutionException( "Grammar does not contain \"states\" map" );
if( !( statesObject instanceof Map<?, ?> ) )
throw new ResolutionException( "\"states\" must be a map" );
for( Map.Entry<String, Object> entry : ( (Map<String, Object>) statesObject ).entrySet() )
{
String stateName = entry.getKey();
Object stateObject = entry.getValue();
if( !( stateObject instanceof Iterable<?> ) )
throw new ResolutionException( "State \"" + stateName + "\" must be an array" );
for( Iterable<Object> arguments : (Iterable<Iterable<Object>>) stateObject )
{
List<Object> argumentsList = new ArrayList<Object>();
for( Object argument : (List<Object>) arguments )
argumentsList.add( argument );
if( argumentsList.isEmpty() )
throw new ResolutionException( "Entry in state \"" + stateName + "\" must have at least one argument" );
Object command = argumentsList.get( 0 );
if( !( command instanceof String ) )
throw new ResolutionException( "Entry in state \"" + stateName + "\" must have a string as the first argument" );
if( command.equals( "#include" ) )
{
if( argumentsList.size() != 2 )
throw new ResolutionException( "\"#include\" command in state \"" + stateName + "\" must have a string as an argument" );
Object includedState = argumentsList.get( 1 );
if( !( includedState instanceof String ) )
throw new ResolutionException( "\"#include\" command in state \"" + stateName + "\" must have a string as an argument" );
include( stateName, (String) includedState );
}
else if( command.equals( "#using" ) )
{
if( argumentsList.size() != 3 )
throw new ResolutionException( "\"#using\" command in state \"" + stateName + "\" must have two strings as arguments" );
Object pattern = argumentsList.get( 1 );
if( !( pattern instanceof String ) )
throw new ResolutionException( "\"#using\" command in state \"" + stateName + "\" must have two strings as arguments" );
Object usingLexerName = argumentsList.get( 2 );
if( !( usingLexerName instanceof String ) )
throw new ResolutionException( "\"#using\" command in state \"" + stateName + "\" must have two strings as arguments" );
getState( stateName ).addDef( new UsingRuleDef( stateName, (String) pattern, (String) usingLexerName ) );
}
else if( command.equals( "#save" ) )
{
if( argumentsList.size() != 2 )
throw new ResolutionException( "\"#save\" command in state \"" + stateName + "\" must have one string as an argument" );
Object savedStateName = argumentsList.get( 1 );
if( !( savedStateName instanceof String ) )
throw new ResolutionException( "\"#save\" command in state \"" + stateName + "\" must have one string as an argument" );
getState( stateName ).addDef( new SaveDef( stateName, (String) savedStateName ) );
}
else
{
// Command is a pattern
String pattern = (String) command;
if( pattern.startsWith( "#constant:" ) )
{
// Concatenate
StringBuilder builder = new StringBuilder();
String[] concatArguments = pattern.substring( 10 ).split( "," );
for( String concatArgument : concatArguments )
{
List<String> strings = constants.get( concatArgument );
if( strings == null )
throw new ResolutionException( "Unknown constant \"" + concatArgument + "\" for #pattern in state \"" + stateName + "\" must have at least a token type as an argument" );
for( String string : strings )
builder.append( string );
}
pattern = builder.toString();
}
if( argumentsList.size() < 2 )
throw new ResolutionException( "Rule in state \"" + stateName + "\" must have at least a token type as an argument" );
Object tokenTypeNames = argumentsList.get( 1 );
if( tokenTypeNames instanceof String )
{
ArrayList<String> list = new ArrayList<String>( 1 );
list.add( (String) tokenTypeNames );
tokenTypeNames = list;
}
if( !( tokenTypeNames instanceof List<?> ) )
throw new ResolutionException( "Expected token type name or array of token type names in rule in state \"" + stateName + "\"" );
if( argumentsList.size() == 2 )
{
// Token rule
getState( stateName ).addDef( new TokenRuleDef( stateName, pattern, defaultFlags, (List<String>) tokenTypeNames ) );
}
else if( argumentsList.size() == 3 )
{
// Change state token rule
Object nextStateNames = argumentsList.get( 2 );
if( nextStateNames instanceof String )
{
ArrayList<String> list = new ArrayList<String>( 1 );
list.add( (String) nextStateNames );
nextStateNames = list;
}
if( !( nextStateNames instanceof List<?> ) )
throw new ResolutionException( "Expected state name or array of state names in rule in state \"" + stateName + "\"" );
getState( stateName ).addDef( new ChangeStateTokenRuleDef( stateName, pattern, defaultFlags, (List<String>) tokenTypeNames, (List<String>) nextStateNames ) );
}
else
throw new ResolutionException( "Too many arguments for rule in state \"" + stateName + "\"" );
}
}
}
}
}