/* * * Copyright 2012 lexergen. * This file is part of lexergen. * * lexergen is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * lexergen is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with lexergen. If not, see <http://www.gnu.org/licenses/>. * * lexergen: * A tool to chunk source code into tokens for further processing in a compiler chain. * * Projectgroup: bi, bii * * Authors: Johannes Dahlke * * Module: Softwareprojekt Übersetzerbau 2012 * * Created: Apr. 2012 * Version: 1.0 * */ package de.fuberlin.bii.regextodfaconverter.directconverter.regex.operatortree; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Queue; import java.util.Stack; import java.util.concurrent.ArrayBlockingQueue; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.ItemAutomat; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.Lr1ItemAutomat; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.Slr1ItemAutomat; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.ContextFreeGrammar; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.EmptyString; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.Grammar; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.Nonterminal; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.ProductionRule; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.ProductionSet; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.RuleElement; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.Symbol; import de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar.Terminal; import de.fuberlin.bii.regextodfaconverter.directconverter.regex.RegexCharSet; import de.fuberlin.bii.regextodfaconverter.directconverter.regex.RegexSection; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.AbstractSyntaxTree; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.AttributesMap; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.SemanticRule; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.SemanticRules; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.SyntaxDirectedDefinition; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.Tree; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.TreeIterator; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.node.InnerNode; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.node.Leaf; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.node.TreeNode; import de.fuberlin.bii.regextodfaconverter.directconverter.syntaxtree.node.TreeNodeCollection; import de.fuberlin.bii.regextodfaconverter.fsm.StatePayload; import de.fuberlin.bii.utils.Test; /** * Operatorbaum für reguläre Ausdrücke. * * @author Johannes Dahlke * */ @SuppressWarnings("rawtypes") public class RegexOperatorTree<StatePayloadType extends Serializable> implements Tree, AttributizedOperatorTree { // definition of nonterminals private static final Nonterminal NONTERMINAL_R = new Nonterminal( "R"); private static final Nonterminal NONTERMINAL_S = new Nonterminal( "S"); private static final Nonterminal NONTERMINAL_T = new Nonterminal( "T"); private static final Nonterminal NONTERMINAL_U = new Nonterminal( "U"); private static final Nonterminal NONTERMINAL_V = new Nonterminal( "V"); private static final Nonterminal NONTERMINAL_Z = new Nonterminal( "Z"); private static final Nonterminal NONTERMINAL_REPETITION_COUNT = new Nonterminal( "RZ"); private static final Nonterminal NONTERMINAL_CLASS_SIGNUM = new Nonterminal( "CS"); private static final Nonterminal NONTERMINAL_CLASS_FIRST_ELEMENT = new Nonterminal( "CF"); private static final Nonterminal NONTERMINAL_CLASS_ELEMENTS = new Nonterminal( "CE"); private static final Nonterminal NONTERMINAL_CLASS_RANGE = new Nonterminal( "CR"); private static final Nonterminal NONTERMINAL_CLASS_FIRST_RANGE_BYPASS = new Nonterminal( "CFP"); private static final Nonterminal NONTERMINAL_CLASS_ELEMENTS_BYPASS = new Nonterminal( "CEP"); private static final Nonterminal NONTERMINAL_CV = new Nonterminal( "CV"); private static final Nonterminal NONTERMINAL_FIRST_CV = new Nonterminal( "CFV"); private static final Nonterminal START_SYMBOL = NONTERMINAL_R; // definition of terminals private static final Terminal<RegularExpressionElement> TERMINAL_TERMINATOR = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.TERMINATOR)); private static final Terminal<RegularExpressionElement> BRACKET_LEFT_GROUP = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_GROUP_BEGIN)); private static final Terminal<RegularExpressionElement> BRACKET_RIGHT_GROUP = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_GROUP_END)); private static final Terminal<RegularExpressionElement> BRACKET_LEFT_CLASS = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_CLASS_BEGIN)); private static final Terminal<RegularExpressionElement> BRACKET_RIGHT_CLASS = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_CLASS_END)); private static final Terminal<RegularExpressionElement> TERMINAL_MASK = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_MASK)); private static final Terminal<RegularExpressionElement> QUANTIFIER_KLEENE_CLOSURE = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_KLEENE_CLOSURE)); private static final Terminal<RegularExpressionElement> QUANTIFIER_POSITIVE_CLOSURE = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_POSITIVE_CLOSURE)); private static final Terminal<RegularExpressionElement> QUANTIFIER_OPTION = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_OPTION)); private static final Terminal<RegularExpressionElement> BRACKET_LEFT_QUANTIFIER = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_REPETITION_BEGIN)); private static final Terminal<RegularExpressionElement> BRACKET_RIGHT_QUANTIFIER = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_REPETITION_END)); private static final Terminal<RegularExpressionElement> OPERATOR_ALTERNATIVE = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_ALTERNATIVE)); @SuppressWarnings("unchecked") private static final Terminal<RegularExpressionElement> EMPTY_STRING = new EmptyString(); //private static final Terminal<RegularExpressionElement> OPERATOR_CONCATENATION = new Terminal<RegularExpressionElement>( new RegularExpressionElement( '.')); private static final Terminal<RegularExpressionElement> CLASSIFIER_JOKER = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_JOKER)); private static final Terminal<RegularExpressionElement> CLASSIFIER_CLASS_SIGNUM = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_CLASS_SIGNUM)); private static final Terminal<RegularExpressionElement> OPERATOR_RANGE = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_RANGE)); private static final Terminal<RegularExpressionElement> SEPARATOR_QUANTIFIER = new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_QUANTIFIER_SEPARATOR)); // definitions of productions // Priority level 0 (Alternative = highest) private static final ProductionRule PRODUCTION_REGEX_ALTERNATIVE = new ProductionRule(NONTERMINAL_R, NONTERMINAL_R, OPERATOR_ALTERNATIVE, NONTERMINAL_S); private static final ProductionRule PRODUCTION_REGEX_ALTERNATIVE_BYPASS = new ProductionRule(NONTERMINAL_R, NONTERMINAL_S); //Priority level 1 (Concatenation) private static final ProductionRule PRODUCTION_REGEX_CONCATENATION = new ProductionRule(NONTERMINAL_S, NONTERMINAL_S, NONTERMINAL_T); private static final ProductionRule PRODUCTION_REGEX_CONCATENATION_BYPASS = new ProductionRule(NONTERMINAL_S, NONTERMINAL_T); private static final ProductionRule PRODUCTION_REGEX_EMPTY_STRING = new ProductionRule(NONTERMINAL_S, EMPTY_STRING); //Priority level 2 (Repetition) private static final ProductionRule PRODUCTION_REGEX_KLEENE_CLOSURE = new ProductionRule(NONTERMINAL_T, NONTERMINAL_U, QUANTIFIER_KLEENE_CLOSURE); private static final ProductionRule PRODUCTION_REGEX_POSITIVE_CLOSURE = new ProductionRule(NONTERMINAL_T, NONTERMINAL_U, QUANTIFIER_POSITIVE_CLOSURE); private static final ProductionRule PRODUCTION_REGEX_OPTION = new ProductionRule(NONTERMINAL_T, NONTERMINAL_U, QUANTIFIER_OPTION); private static final ProductionRule PRODUCTION_REGEX_REPETITION_RANGE = new ProductionRule(NONTERMINAL_T, NONTERMINAL_U, BRACKET_LEFT_QUANTIFIER, NONTERMINAL_REPETITION_COUNT, SEPARATOR_QUANTIFIER, NONTERMINAL_REPETITION_COUNT, BRACKET_RIGHT_QUANTIFIER); private static final ProductionRule PRODUCTION_REGEX_REPETITION_RANGE_TO_INFTY = new ProductionRule(NONTERMINAL_T, NONTERMINAL_U, BRACKET_LEFT_QUANTIFIER, NONTERMINAL_REPETITION_COUNT, SEPARATOR_QUANTIFIER, BRACKET_RIGHT_QUANTIFIER); private static final ProductionRule PRODUCTION_REGEX_REPETITION = new ProductionRule(NONTERMINAL_T, NONTERMINAL_U, BRACKET_LEFT_QUANTIFIER, NONTERMINAL_REPETITION_COUNT, BRACKET_RIGHT_QUANTIFIER); private static final ProductionRule PRODUCTION_REGEX_REPETITION_COUNT = new ProductionRule( NONTERMINAL_REPETITION_COUNT, NONTERMINAL_REPETITION_COUNT, NONTERMINAL_Z); private static final ProductionRule PRODUCTION_REGEX_REPETITION_COUNT_BYPASS = new ProductionRule( NONTERMINAL_REPETITION_COUNT, NONTERMINAL_Z); private static final ProductionRule PRODUCTION_REGEX_REPETITION_BYPASS = new ProductionRule(NONTERMINAL_T, NONTERMINAL_U); //Priority level 3 (Enclosure) // 3.1: Grouping private static final ProductionRule PRODUCTION_REGEX_GROUP = new ProductionRule(NONTERMINAL_U, BRACKET_LEFT_GROUP, NONTERMINAL_R, BRACKET_RIGHT_GROUP); // 3.1 Character class definition private static final ProductionRule PRODUCTION_REGEX_CLASS = new ProductionRule(NONTERMINAL_U, BRACKET_LEFT_CLASS, NONTERMINAL_CLASS_SIGNUM, NONTERMINAL_CLASS_FIRST_ELEMENT, NONTERMINAL_CLASS_ELEMENTS, BRACKET_RIGHT_CLASS); private static final ProductionRule PRODUCTION_REGEX_CLASS_SINGLE = new ProductionRule(NONTERMINAL_U, BRACKET_LEFT_CLASS, NONTERMINAL_CLASS_SIGNUM, NONTERMINAL_CLASS_FIRST_ELEMENT, BRACKET_RIGHT_CLASS); // 3.1.1 Invert class definition or not private static final ProductionRule PRODUCTION_REGEX_CLASS_SIGNUM_INVERT = new ProductionRule(NONTERMINAL_CLASS_SIGNUM, CLASSIFIER_CLASS_SIGNUM); private static final ProductionRule PRODUCTION_REGEX_CLASS_SIGNUM_RIGHT = new ProductionRule(NONTERMINAL_CLASS_SIGNUM, EMPTY_STRING); // 3.1.2 at least one character is expected to define a class private static final ProductionRule PRODUCTION_REGEX_CLASS_FIRST_RANGE_ELEMENT = new ProductionRule(NONTERMINAL_CLASS_FIRST_ELEMENT, NONTERMINAL_FIRST_CV, OPERATOR_RANGE, NONTERMINAL_CV); private static final ProductionRule PRODUCTION_REGEX_CLASS_FIRST_RANGE_BYPASS = new ProductionRule(NONTERMINAL_CLASS_FIRST_ELEMENT, NONTERMINAL_CLASS_FIRST_RANGE_BYPASS); private static final ProductionRule PRODUCTION_REGEX_CLASS_FIRST_SINGLE_ELEMENT = new ProductionRule( NONTERMINAL_CLASS_FIRST_RANGE_BYPASS, NONTERMINAL_FIRST_CV); private static final ProductionRule PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS = new ProductionRule(NONTERMINAL_CLASS_ELEMENTS, NONTERMINAL_CLASS_ELEMENTS, NONTERMINAL_CLASS_RANGE); private static final ProductionRule PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS_FINAL = new ProductionRule(NONTERMINAL_CLASS_ELEMENTS, NONTERMINAL_CLASS_RANGE); private static final ProductionRule PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS_BYPASS = new ProductionRule(NONTERMINAL_CLASS_ELEMENTS, NONTERMINAL_CLASS_ELEMENTS_BYPASS); private static final ProductionRule PRODUCTION_REGEX_CLASS_BYPASS_ELEMENTS = new ProductionRule(NONTERMINAL_CLASS_ELEMENTS, NONTERMINAL_CLASS_ELEMENTS, NONTERMINAL_CLASS_ELEMENTS_BYPASS); private static final ProductionRule PRODUCTION_REGEX_CLASS_RANGE = new ProductionRule(NONTERMINAL_CLASS_RANGE, NONTERMINAL_CV, OPERATOR_RANGE, NONTERMINAL_CV); private static final ProductionRule PRODUCTION_REGEX_CLASS_SINGLE_ELEMENT = new ProductionRule(NONTERMINAL_CLASS_ELEMENTS_BYPASS, NONTERMINAL_CV); private static final ProductionRule PRODUCTION_REGEX_JOKER_ELEMENT = new ProductionRule(NONTERMINAL_U, CLASSIFIER_JOKER); // 3.n bypass enclosure private static final ProductionRule PRODUCTION_REGEX_BRACKET_BYPASS = new ProductionRule(NONTERMINAL_U, NONTERMINAL_V); private AbstractSyntaxTree ast; private OperatorTreeAttributor<StatePayloadType> operatorTreeAttributor = new OperatorTreeAttributor<StatePayloadType>(); private TreeNode terminatorNode; @SuppressWarnings("unchecked") public RegexOperatorTree( RegularExpressionElement<StatePayloadType>[] regularExpression) throws Exception { super(); ContextFreeGrammar regexGrammar = getRegexGrammar(); SyntaxDirectedDefinition regexSdd = getRegexSdd(); extendGrammarAndSddWithTerminator( regexGrammar, regexSdd); // extends regex string regularExpression = Arrays.copyOf( regularExpression, regularExpression.length +1); regularExpression[regularExpression.length -1] = new RegularExpressionElement( RegexCharSet.TERMINATOR, null); ast = new AbstractSyntaxTree<RegularExpressionElement<StatePayloadType>>( regexGrammar, regexSdd, regularExpression) { @Override protected ItemAutomat<RegularExpressionElement<StatePayloadType>> getNewItemAutomat( Grammar grammar) { boolean usePersistentParserTable = true; return new Slr1ItemAutomat<RegularExpressionElement<StatePayloadType>>( (ContextFreeGrammar) grammar, usePersistentParserTable); } }; operatorTreeAttributor.attributizeOperatorTree( this); } /** * Gibt eine Grammatik für reguläre Ausdrücke zurück. * @return */ public static ContextFreeGrammar getRegexGrammar() { ContextFreeGrammar grammar = new ContextFreeGrammar(); ProductionSet productions = new ProductionSet(); // Priority level 0 (Alternative = highest) productions.add( PRODUCTION_REGEX_ALTERNATIVE); productions.add( PRODUCTION_REGEX_ALTERNATIVE_BYPASS); // Priority level 1 (Concatenation) productions.add( PRODUCTION_REGEX_CONCATENATION); productions.add( PRODUCTION_REGEX_CONCATENATION_BYPASS); productions.add( PRODUCTION_REGEX_EMPTY_STRING); // Priority level 2 (Repetition) productions.add( PRODUCTION_REGEX_KLEENE_CLOSURE); productions.add( PRODUCTION_REGEX_POSITIVE_CLOSURE); productions.add( PRODUCTION_REGEX_OPTION); productions.add( PRODUCTION_REGEX_REPETITION_BYPASS); // Priority level 2.1 (custom repetition) productions.add( PRODUCTION_REGEX_REPETITION_RANGE); productions.add( PRODUCTION_REGEX_REPETITION_RANGE_TO_INFTY); productions.add( PRODUCTION_REGEX_REPETITION); productions.add( PRODUCTION_REGEX_REPETITION_COUNT); productions.add( PRODUCTION_REGEX_REPETITION_COUNT_BYPASS); // Priority level 3 (Enclosure) productions.add( PRODUCTION_REGEX_GROUP); // 3.1 Character class definition productions.add( PRODUCTION_REGEX_CLASS); productions.add( PRODUCTION_REGEX_CLASS_SINGLE); // 3.1.1 Invert class definition or not productions.add( PRODUCTION_REGEX_CLASS_SIGNUM_INVERT); productions.add( PRODUCTION_REGEX_CLASS_SIGNUM_RIGHT); // 3.1.2 at least one character is expected to define a class productions.add( PRODUCTION_REGEX_CLASS_FIRST_RANGE_ELEMENT); productions.add( PRODUCTION_REGEX_CLASS_FIRST_RANGE_BYPASS); productions.add( PRODUCTION_REGEX_CLASS_FIRST_SINGLE_ELEMENT); productions.add( PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS); productions.add( PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS_FINAL); productions.add( PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS_BYPASS); productions.add( PRODUCTION_REGEX_CLASS_RANGE); productions.add( PRODUCTION_REGEX_CLASS_BYPASS_ELEMENTS); productions.add( PRODUCTION_REGEX_CLASS_SINGLE_ELEMENT); productions.add( PRODUCTION_REGEX_JOKER_ELEMENT); productions.add( PRODUCTION_REGEX_BRACKET_BYPASS); // MAIN Terminals for ( char c : RegexCharSet.getUnguardedCharsOfContext( RegexSection.MAIN)) { Terminal<RegularExpressionElement> terminal = new Terminal<RegularExpressionElement>( new RegularExpressionElement( (char) c)); productions.add( new ProductionRule(NONTERMINAL_V, terminal)); } for ( char c : RegexCharSet.getMetaCharsOfContext( RegexSection.MAIN)) { Terminal<RegularExpressionElement> metaTerminal = new Terminal<RegularExpressionElement>( new RegularExpressionElement( (char) c)); productions.add( new ProductionRule(NONTERMINAL_V, TERMINAL_MASK, metaTerminal)); } //REPETITION Terminals for ( int i = 0x30; i <= 0x39; i++) { Terminal<RegularExpressionElement> terminal = new Terminal<RegularExpressionElement>( new RegularExpressionElement( (char) i)); productions.add( new ProductionRule(NONTERMINAL_Z, terminal)); } // CLASS Terminals for ( char c : RegexCharSet.getUnguardedCharsOfContext( RegexSection.CHARACTER_CLASS)) { Terminal<RegularExpressionElement> terminal = new Terminal<RegularExpressionElement>( new RegularExpressionElement( (char) c)); productions.add( new ProductionRule(NONTERMINAL_CV, terminal)); productions.add( new ProductionRule(NONTERMINAL_FIRST_CV, terminal)); } productions.add( new ProductionRule(NONTERMINAL_CV, TERMINAL_MASK, new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_RANGE)))); productions.add( new ProductionRule(NONTERMINAL_CV, new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_CLASS_SIGNUM)))); productions.add( new ProductionRule(NONTERMINAL_CV, TERMINAL_MASK, new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_CLASS_END)))); productions.add( new ProductionRule(NONTERMINAL_CV, TERMINAL_MASK, TERMINAL_MASK)); productions.add( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_RANGE)))); productions.add( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_CLASS_SIGNUM)))); productions.add( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, new Terminal<RegularExpressionElement>( new RegularExpressionElement( RegexCharSet.REGEX_CLASS_END)))); productions.add( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, TERMINAL_MASK)); // TODO: Regex Grammatik noch unvollständig grammar.addAll( productions); grammar.setStartSymbol( START_SYMBOL); return grammar; } /** * Gibt eine syntaxgerichtete Definition zur Übersetzung regulärer Ausdrücke in einen abstrakten Syntaxbaum an. * @return */ public static SyntaxDirectedDefinition getRegexSdd() { SyntaxDirectedDefinition result = new SyntaxDirectedDefinition(); // ++++++++++++++++++++++++++++++ // Priority level 0 (Alternative) // ++++++++++++++++++++++++++++++ // R -> R1 | S SemanticRules semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { OperatorNode nodeR = new OperatorNode( OperatorType.ALTERNATIVE); TreeNode nodeR1 = (TreeNode) attributesMaps[1].get( "node"); TreeNode nodeS = (TreeNode) attributesMaps[3].get( "node"); Serializable payload = ((Symbol) attributesMaps[2].get( "value")).getPayload(); tryPassPayloadDownwards( payload, nodeR1, nodeS); nodeR.setLeftChildNode( nodeR1); nodeR.setRightChildNode( nodeS); attributesMaps[0].put( "node", nodeR); // cNodeR++; } }); result.put( PRODUCTION_REGEX_ALTERNATIVE, semanticRules); // R -> S semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode nodeS = (TreeNode) attributesMaps[1].get( "node"); attributesMaps[0].put( "node", nodeS); // cNodeR++; } }); result.put( PRODUCTION_REGEX_ALTERNATIVE_BYPASS, semanticRules); // ++++++++++++++++++++++++++++++++ // Priority level 1 (Concatenation) // ++++++++++++++++++++++++++++++++ // S -> S1 T semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { OperatorNode nodeS = new OperatorNode( OperatorType.CONCATENATION); TreeNode nodeS1 = (TreeNode) attributesMaps[1].get( "node"); TreeNode nodeT = (TreeNode) attributesMaps[2].get( "node"); nodeS.setLeftChildNode( nodeS1); nodeS.setRightChildNode( nodeT); attributesMaps[0].put( "node", nodeS); } }); result.put( PRODUCTION_REGEX_CONCATENATION, semanticRules); // S -> T semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode nodeT = (TreeNode) attributesMaps[1].get( "node"); attributesMaps[0].put( "node", nodeT); } }); result.put( PRODUCTION_REGEX_CONCATENATION_BYPASS, semanticRules); // ----------------------------- // Empty string // ----------------------------- // S -> \epsilon semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode<Symbol> nodeTerminal = new TerminalNode( new RegularExpressionElement( RegexCharSet.EMPTY_STRING)); attributesMaps[0].put( "node", nodeTerminal); } }); result.put( PRODUCTION_REGEX_EMPTY_STRING, semanticRules); // +++++++++++++++++++++++++++++ // Priority level 2 (Repetition) // +++++++++++++++++++++++++++++ // T -> U* semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { RepetitionRange repetitionRange = new RepetitionRange( 0, Integer.MAX_VALUE); OperatorNode nodeT = new OperatorNode( OperatorType.REPETITION, repetitionRange); TreeNode nodeU = (TreeNode) attributesMaps[1].get( "node"); Serializable payload = ((Symbol) attributesMaps[2].get( "value")).getPayload(); tryPassPayloadDownwards( payload, nodeU); nodeT.setLeftChildNode( nodeU); attributesMaps[0].put( "node", nodeT); } }); result.put( PRODUCTION_REGEX_KLEENE_CLOSURE, semanticRules); // T -> U+ (positive closure) semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { OperatorNode nodeT = new OperatorNode( OperatorType.CONCATENATION); TreeNode nodeU = (TreeNode) attributesMaps[1].get( "node"); Serializable payload = ((Symbol) attributesMaps[2].get( "value")).getPayload(); tryPassPayloadDownwards( payload, nodeU); // The first fix repetition nodeT.setLeftChildNode( nodeU); // All following optional repetitions RepetitionRange repetitionRange = new RepetitionRange( 0, Integer.MAX_VALUE); OperatorNode nodeKleeneClosure = new OperatorNode( OperatorType.REPETITION, repetitionRange); nodeKleeneClosure.setLeftChildNode( nodeU); nodeT.setRightChildNode( nodeKleeneClosure); // return new positive closure node attributesMaps[0].put( "node", nodeT); } }); result.put( PRODUCTION_REGEX_POSITIVE_CLOSURE, semanticRules); // T -> U? (option) semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { OperatorNode nodeT = new OperatorNode( OperatorType.ALTERNATIVE); // cNodeR++; TreeNode nodeU = (TreeNode) attributesMaps[1].get( "node"); Serializable payload = ((Symbol) attributesMaps[2].get( "value")).getPayload(); tryPassPayloadDownwards( payload, nodeU); // The possibility to accept the empty string nodeT.setLeftChildNode( new TerminalNode( new RegularExpressionElement( RegexCharSet.EMPTY_STRING))); // or to accept the expression in U nodeT.setRightChildNode( nodeU); // return new option node attributesMaps[0].put( "node", nodeT); } }); result.put( PRODUCTION_REGEX_OPTION, semanticRules); // T -> U (bypass) semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode nodeU = (TreeNode) attributesMaps[1].get( "node"); attributesMaps[0].put( "node", nodeU); } }); result.put( PRODUCTION_REGEX_REPETITION_BYPASS, semanticRules); // +++++++++++++++++++++++++++++ // Priority level 2.1 (Custom Repetition) // +++++++++++++++++++++++++++++ // T -> U { RZ , RZ1 } semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) throws OperatorTreeException { try { TreeNode nodeU = (TreeNode) attributesMaps[1].get( "node"); int valueRZ = (Integer) attributesMaps[3].get( "value"); int valueRZ1 = (Integer) attributesMaps[5].get( "value"); List<TreeNode> repetitionNodes = new ArrayList<TreeNode>(); for ( int i = valueRZ; i <= valueRZ1; i++) { repetitionNodes.add( getRepeatitionOfNode( nodeU, i)); } TreeNode relRootNode = getNodesAsAlternatives( repetitionNodes); attributesMaps[0].put( "node", relRootNode); } catch (Exception e) { throw new OperatorTreeException( "Invalid regular expression. Cannot resolv custom repetition statement."); } } }); result.put( PRODUCTION_REGEX_REPETITION_RANGE, semanticRules); // T -> U { RZ , } semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) throws OperatorTreeException { try { // like T -> U { RZ } TreeNode nodeU = (TreeNode) attributesMaps[1].get( "node"); int valueRZ = (Integer) attributesMaps[3].get( "value"); TreeNode relRootNode = getRepeatitionOfNode( nodeU, valueRZ); // add RZ * RepetitionRange repetitionRange = new RepetitionRange( 0, Integer.MAX_VALUE); OperatorNode nodeInfty = new OperatorNode( OperatorType.REPETITION, repetitionRange); nodeInfty.setLeftChildNode( (TreeNode) nodeU.clone()); TreeNode leftChild = relRootNode; TreeNode rightChild = nodeInfty; relRootNode = new OperatorNode( OperatorType.CONCATENATION); ((OperatorNode) relRootNode).setLeftChildNode( leftChild); ((OperatorNode) relRootNode).setRightChildNode( rightChild); attributesMaps[0].put( "node", relRootNode); } catch (Exception e) { throw new OperatorTreeException( "Invalid regular expression. Cannot resolv custom repetition statement."); } } }); result.put( PRODUCTION_REGEX_REPETITION_RANGE_TO_INFTY, semanticRules); // T -> U { RZ } semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) throws OperatorTreeException { try { TreeNode nodeU = (TreeNode) attributesMaps[1].get( "node"); int valueRZ = (Integer) attributesMaps[3].get( "value"); TreeNode relRootNode = getRepeatitionOfNode( nodeU, valueRZ); attributesMaps[0].put( "node", relRootNode); } catch (Exception e) { throw new OperatorTreeException( "Invalid regular expression. Cannot resolv custom repetition statement."); } } }); result.put( PRODUCTION_REGEX_REPETITION, semanticRules); // RZ -> RZ1 Z semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { int valueRZ1 = (Integer) attributesMaps[1].get( "value"); int valueZ = (Integer) attributesMaps[2].get( "value"); int valueRZ = valueRZ1 *10 + valueZ; attributesMaps[0].put( "value", valueRZ); } }); result.put( PRODUCTION_REGEX_REPETITION_COUNT, semanticRules); // RZ -> Z (bypass) semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { attributesMaps[0].put( "value", attributesMaps[1].get( "value")); } }); result.put( PRODUCTION_REGEX_REPETITION_COUNT_BYPASS, semanticRules); // +++++++++++++++++++++++++++++ // Priority level 3 (Enclosure) // +++++++++++++++++++++++++++++ // U -> V (bypass) semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode nodeV = (TreeNode) attributesMaps[1].get( "node"); attributesMaps[0].put( "node", nodeV); } }); result.put( PRODUCTION_REGEX_BRACKET_BYPASS, semanticRules); // U -> ( R ) semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode nodeR = (TreeNode) attributesMaps[2].get( "node"); Serializable payload = ((Symbol) attributesMaps[3].get( "value")).getPayload(); tryPassPayloadDownwards( payload, nodeR); attributesMaps[0].put( "node", nodeR); } }); result.put( PRODUCTION_REGEX_GROUP, semanticRules); // ----------------------------- // 3.1 Character class definition // ----------------------------- // U -> [ CS CF CE ] and U -> [ CS CF ] semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { @SuppressWarnings("unchecked") public void apply( AttributesMap... attributesMaps) { OperatorNode nodeU = new OperatorNode( OperatorType.ALTERNATIVE); Boolean buildComplementClass = (Boolean) attributesMaps[2].get( "complement"); // determin the common payload Serializable commonPayload = ((Symbol) attributesMaps[1].get( "value")).getPayload(); if ( Test.isUnassigned( commonPayload)) commonPayload = (Serializable) attributesMaps[2].get( "payload"); List<Symbol> values = (List<Symbol>) attributesMaps[3].get( "values"); // in case of U -> [ CS CF CE ] if ( attributesMaps.length > 5 && Test.isAssigned( attributesMaps[4])) { values.addAll( (List<Symbol>) attributesMaps[4].get( "values")); if ( Test.isUnassigned( commonPayload)) commonPayload = ((Symbol) attributesMaps[5].get( "value")).getPayload(); } else { // in case of U -> [ CS CF ] if ( Test.isUnassigned( commonPayload)) commonPayload = ((Symbol) attributesMaps[4].get( "value")).getPayload(); } if ( buildComplementClass) { // build the complement class by exclusion of all chars mentioned in list values // and add the determine common payload to each of them List<Symbol> complementValues = new ArrayList<Symbol>(); for ( char c = RegexCharSet.getFirstAsciiChar(); c <= RegexCharSet.getLastAsciiChar(); c++) { RegularExpressionElement complementCharacterCandidate = new RegularExpressionElement( c, commonPayload); if ( !values.contains( complementCharacterCandidate)) complementValues.add( complementCharacterCandidate); } values = complementValues; } else { // otherwise, we use the given values. Finally we add the common payload to all that unassigned for ( Symbol value : values) { if ( Test.isUnassigned( value.getPayload())) value.setPayload( commonPayload); } } // convert values to nodes List<TreeNode> nodes = new ArrayList<TreeNode>(); for ( Symbol value : values) { nodes.add( new TerminalNode( value)); } TreeNode relRootNode = getNodesAsAlternatives( nodes); attributesMaps[0].put( "node", relRootNode); } }); result.put( PRODUCTION_REGEX_CLASS, semanticRules); result.put( PRODUCTION_REGEX_CLASS_SINGLE, semanticRules); // CS -> ^ semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { Object payload = ((Symbol) attributesMaps[1].get( "value")).getPayload(); attributesMaps[0].put( "complement", true); attributesMaps[0].put( "payload", payload); } }); result.put( PRODUCTION_REGEX_CLASS_SIGNUM_INVERT, semanticRules); // CS -> \epsilon semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { attributesMaps[0].put( "complement", false); } }); result.put( PRODUCTION_REGEX_CLASS_SIGNUM_RIGHT, semanticRules); // CE -> CE CEP semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { @SuppressWarnings("unchecked") public void apply( AttributesMap... attributesMaps) { List<Symbol> valuesCE = (List<Symbol>) attributesMaps[1].get( "values"); List<Symbol> valuesCEP = (List<Symbol>) attributesMaps[2].get( "values"); valuesCE.addAll( valuesCEP); attributesMaps[0].put( "values", valuesCE); } }); result.put( PRODUCTION_REGEX_CLASS_BYPASS_ELEMENTS, semanticRules); // alike CE -> CE CR result.put( PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS, semanticRules); // CE -> CEP semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { attributesMaps[0].put( "values", attributesMaps[1].get( "values")); } }); result.put( PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS_BYPASS, semanticRules); // alike CE -> CR result.put( PRODUCTION_REGEX_CLASS_RANGE_ELEMENTS_FINAL, semanticRules); // CF -> CFP result.put( PRODUCTION_REGEX_CLASS_FIRST_RANGE_BYPASS, semanticRules); // CEP -> CV semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { List<Symbol> values = new ArrayList<Symbol>(); values.add( (Symbol) attributesMaps[1].get( "value")); attributesMaps[0].put( "values", values); } }); result.put( PRODUCTION_REGEX_CLASS_SINGLE_ELEMENT, semanticRules); // alike CFP -> CFV result.put( PRODUCTION_REGEX_CLASS_FIRST_SINGLE_ELEMENT, semanticRules); // CF -> CFV - CFV whereas holds Ord(V) <= Ord(V1) semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { @SuppressWarnings("unchecked") public void apply( AttributesMap... attributesMaps) throws OperatorTreeException { List<Symbol> values = new ArrayList<Symbol>(); RegularExpressionElement valueV = (RegularExpressionElement) attributesMaps[1].get( "value"); Serializable commonPayload = ((Symbol) attributesMaps[2].get( "value")).getPayload(); RegularExpressionElement valueV1 = (RegularExpressionElement) attributesMaps[3].get( "value"); char valueVChar = (Character) valueV.getValue(); char valueV1Char = (Character) valueV1.getValue(); for ( char c = valueVChar; c <= valueV1Char; c++) { if ( c == valueVChar && Test.isAssigned( valueV.getPayload())) values.add( new RegularExpressionElement( c, valueV.getPayload())); else if ( c == valueV1Char && Test.isAssigned( valueV1.getPayload())) values.add( new RegularExpressionElement( c, valueV1.getPayload())); else values.add( new RegularExpressionElement( c, commonPayload)); } if ( values.isEmpty()) throw new OperatorTreeException( "Invalid regular expression. Empty range in character class."); attributesMaps[0].put( "values", values); } }); result.put( PRODUCTION_REGEX_CLASS_FIRST_RANGE_ELEMENT, semanticRules); // CR -> CV - CV result.put( PRODUCTION_REGEX_CLASS_RANGE, semanticRules); // ----------------------------- // 3.2 JOKER // ----------------------------- // U -> . semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { @SuppressWarnings("unchecked") public void apply( AttributesMap... attributesMaps) { Serializable payload = ((Symbol) attributesMaps[1].get( "value")).getPayload(); int firstChar = RegexCharSet.getFirstAsciiChar(); int lastChar = RegexCharSet.getLastAsciiChar(); RegularExpressionElement currentCharElement; List<TreeNode> nodes = new ArrayList<TreeNode>(); for ( int c = firstChar; c <= lastChar; c++) { currentCharElement = new RegularExpressionElement( (char) c, payload); nodes.add( new TerminalNode( currentCharElement)); } TreeNode jokerNode = getNodesAsAlternatives( nodes); attributesMaps[0].put( "node", jokerNode); } }); result.put( PRODUCTION_REGEX_JOKER_ELEMENT, semanticRules); // +++++++++++++++++++++++++++++ // Level 4 (Characters) // +++++++++++++++++++++++++++++ // Main Terminals // V -> a SemanticRules semanticRulesOfUnguardedTerminals = new SemanticRules(); semanticRulesOfUnguardedTerminals.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode<Symbol> nodeTerminal = new TerminalNode( (Symbol) attributesMaps[1].get( "value")); attributesMaps[0].put( "node", nodeTerminal); } }); // V -> \ a SemanticRules semanticRulesOfMetaTerminals = new SemanticRules(); semanticRulesOfMetaTerminals.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { TreeNode<Symbol> nodeTerminal = new TerminalNode( (Symbol) attributesMaps[2].get( "value")); attributesMaps[0].put( "node", nodeTerminal); } }); List<Character> unguardedChars = RegexCharSet.getUnguardedCharsOfContext( RegexSection.MAIN); for ( Terminal terminal : getRegexGrammar().getTerminals()) { if ( unguardedChars.contains( terminal.getSymbol().getValue())) result.put( new ProductionRule(NONTERMINAL_V, terminal), semanticRulesOfUnguardedTerminals); else result.put( new ProductionRule(NONTERMINAL_V, TERMINAL_MASK, terminal), semanticRulesOfMetaTerminals); } // QUANTIFIER values // Z -> 0..9 SemanticRules semanticRulesOfRepetitionValues = new SemanticRules(); semanticRulesOfRepetitionValues.add( new SemanticRule() { @SuppressWarnings("unchecked") public void apply( AttributesMap... attributesMaps) { Character nodeValue = ((RegularExpressionElement<StatePayload>) attributesMaps[1].get( "value")).getValue(); int intValue = Integer.valueOf( nodeValue + ""); attributesMaps[0].put( "value", intValue); } }); for ( int i = 0x30; i <= 0x39; i++) { Terminal<RegularExpressionElement> terminal = new Terminal<RegularExpressionElement>( new RegularExpressionElement( (char) i)); result.put( new ProductionRule(NONTERMINAL_Z, terminal), semanticRulesOfRepetitionValues); } // CLASS Values // CV -> a SemanticRules semanticRulesOfUnguardedClassValues = new SemanticRules(); semanticRulesOfUnguardedClassValues.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { attributesMaps[0].put( "value", attributesMaps[1].get( "value")); } }); // CV -> \ a SemanticRules semanticRulesOfMetaClassValues = new SemanticRules(); semanticRulesOfMetaClassValues.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { attributesMaps[0].put( "value", attributesMaps[2].get( "value")); } }); List<Character> charset = RegexCharSet.getCompleteDomain(); for ( Terminal terminal : getRegexGrammar().getTerminals()) { Character currentChar = (Character) terminal.getSymbol().getValue(); if ( charset.contains( currentChar)) { if ( currentChar == RegexCharSet.REGEX_RANGE) { result.put( new ProductionRule(NONTERMINAL_CV, TERMINAL_MASK, terminal), semanticRulesOfMetaClassValues); result.put( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, terminal), semanticRulesOfMetaClassValues); } else if ( currentChar == RegexCharSet.REGEX_CLASS_SIGNUM) { result.put( new ProductionRule(NONTERMINAL_CV, terminal), semanticRulesOfUnguardedClassValues); result.put( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, terminal), semanticRulesOfMetaClassValues); } else if ( currentChar == RegexCharSet.REGEX_CLASS_END) { result.put( new ProductionRule(NONTERMINAL_CV, TERMINAL_MASK, terminal), semanticRulesOfMetaClassValues); result.put( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, terminal), semanticRulesOfMetaClassValues); } else if ( currentChar == RegexCharSet.REGEX_MASK) { result.put( new ProductionRule(NONTERMINAL_CV, TERMINAL_MASK, TERMINAL_MASK), semanticRulesOfMetaClassValues); result.put( new ProductionRule(NONTERMINAL_FIRST_CV, TERMINAL_MASK, TERMINAL_MASK), semanticRulesOfMetaClassValues); } else { result.put( new ProductionRule(NONTERMINAL_CV, terminal), semanticRulesOfUnguardedClassValues); result.put( new ProductionRule(NONTERMINAL_FIRST_CV, terminal), semanticRulesOfUnguardedClassValues); } } } return result; } private static boolean testBranchNullablePosibility( TreeNode node) { if ( node instanceof OperatorNode) { OperatorNode operatorNode = (OperatorNode) node; switch ( operatorNode.getOperatorType()) { case ALTERNATIVE: return testBranchNullablePosibility( operatorNode.getLeftChildNode()); case CONCATENATION: return false; case REPETITION: return true; } } return false; } @SuppressWarnings("unchecked") protected static void tryPassPayloadDownwards( Serializable payload, TreeNode ... nodes) { for ( TreeNode node : nodes) { if ( node instanceof OperatorNode) { OperatorNode operatorNode = (OperatorNode) node; switch ( operatorNode.getOperatorType()) { case ALTERNATIVE: tryPassPayloadDownwards( payload, operatorNode.getLeftChildNode(), operatorNode.getRightChildNode()); break; case CONCATENATION: if ( testBranchNullablePosibility( operatorNode.getRightChildNode())) tryPassPayloadDownwards( payload, operatorNode.getLeftChildNode(), operatorNode.getRightChildNode()); else tryPassPayloadDownwards( payload, operatorNode.getRightChildNode()); break; case REPETITION: tryPassPayloadDownwards( payload, operatorNode.getLeftChildNode()); break; } } else if ( node instanceof TerminalNode) { TerminalNode terminalNode = (TerminalNode) node; Symbol terminalSymbol = terminalNode.getValue(); if ( Test.isAssigned( terminalSymbol) && Test.isUnassigned( terminalSymbol.getPayload())) terminalSymbol.setPayload( payload); } } } private static TreeNode getRepeatitionOfNode( TreeNode theNode, int times) throws CloneNotSupportedException { TreeNode<Symbol> nodeZero = new TerminalNode( new RegularExpressionElement( RegexCharSet.EMPTY_STRING)); TreeNode relRootNode = times > 0 ? (TreeNode) theNode.clone() : nodeZero; for ( int i = 1; i < times; i++) { TreeNode leftChild = relRootNode; TreeNode rightChild = (TreeNode) theNode.clone(); relRootNode = new OperatorNode( OperatorType.CONCATENATION); ((OperatorNode) relRootNode).setLeftChildNode( leftChild); ((OperatorNode) relRootNode).setRightChildNode( rightChild); } return relRootNode; } private static TreeNode getNodesAsAlternatives( Collection<TreeNode> theNodes) { Queue<TreeNode> nodesQueue = new ArrayBlockingQueue<TreeNode>( theNodes.size()); nodesQueue.addAll( theNodes); TreeNode relRootNode = nodesQueue.poll(); while ( !nodesQueue.isEmpty()) { TreeNode leftChild = relRootNode; TreeNode rightChild = nodesQueue.poll(); relRootNode = new OperatorNode( OperatorType.ALTERNATIVE); ((OperatorNode) relRootNode).setLeftChildNode( leftChild); ((OperatorNode) relRootNode).setRightChildNode( rightChild); } return relRootNode; } /** * Erweitert die Grammatik für reguläre Ausdrücke um das Terminatorsymbol. * */ private static void extendGrammarAndSddWithTerminator( Grammar grammar, SyntaxDirectedDefinition sdd) { // extends grammar Grammar extendedGrammar = grammar; Nonterminal embracingNonterminal = new Nonterminal( "_ROOT_"); Nonterminal priorStartSymbol = extendedGrammar.getStartSymbol(); // end rule ProductionRule terminatorProductionRule = new ProductionRule( embracingNonterminal, priorStartSymbol, TERMINAL_TERMINATOR); extendedGrammar.addProduction( terminatorProductionRule); extendedGrammar.setStartSymbol( embracingNonterminal); // embracingNonterminal -> previousStartSymbol SemanticRules semanticRules = new SemanticRules(); semanticRules.add( new SemanticRule() { public void apply( AttributesMap... attributesMaps) { OperatorNode embracingNonterminalNode = new OperatorNode( OperatorType.CONCATENATION); TreeNode priorStartSymbolNode = (TreeNode) attributesMaps[1].get( "node"); TreeNode<Symbol> terminatorNode = new TerminalNode( (Symbol) attributesMaps[2].get( "value")); embracingNonterminalNode.setLeftChildNode( priorStartSymbolNode); embracingNonterminalNode.setRightChildNode( terminatorNode); attributesMaps[0].put( "node", embracingNonterminalNode); } }); sdd.put( terminatorProductionRule, semanticRules); } public Iterator<TreeNode> iterator() { return new TreeIterator( this); } public TreeNode getRoot() { return (TreeNode) ast.getRootAttributesMap().get( "node"); } public Grammar getGrammar() { return ast.getGrammar(); } public HashMap<TreeNode, TreeNodeCollection> getFirstPositions() { return operatorTreeAttributor.getFirstPositions(); } public HashMap<TreeNode, TreeNodeCollection> getFollowPositions() { return operatorTreeAttributor.getFollowPositions(); } public HashMap<TreeNode, TreeNodeCollection> getLastPositions() { return operatorTreeAttributor.getLastPositions(); } public HashMap<TreeNode, Boolean> getNullables() { return operatorTreeAttributor.getNullables(); } public Collection<Leaf> getLeafSet() { Collection<Leaf> leafSet = new HashSet<Leaf>(); for ( TreeNode node : this) { if ( Test.isAssigned( node) && node instanceof TerminalNode) { leafSet.add( (Leaf) node); } } return leafSet; } /** * Gibt den terminalen Knoten zurück. * @return */ public TreeNode getTerminatorNode() { for ( TreeNode node : this) { if ( Test.isAssigned( node) && node instanceof TerminalNode && Test.isAssigned( node.getValue()) && node.getValue() instanceof Symbol && ((Symbol) node.getValue()).equals( TERMINAL_TERMINATOR.getSymbol())) return node; } return null; } @Override public String toString() { return ast.toString(); } }