/** * Copyright (c) Cohesive Integrations, LLC * Copyright (c) Codice Foundation * * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 3 of the License, or any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. A copy of the GNU Lesser General Public License is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. * **/ package net.di2e.ecdr.querylanguage.basic.keywordparser; import org.parboiled.Action; import org.parboiled.BaseParser; import org.parboiled.Context; import org.parboiled.Rule; import org.parboiled.annotations.BuildParseTree; import org.parboiled.annotations.SuppressNode; import org.parboiled.support.StringVar; @SuppressWarnings( { "InfiniteRecursion" } ) @BuildParseTree public class KeywordTextParser extends BaseParser<ASTNode> { protected static final String OR_STRING = "OR"; protected static final String AND_STRING = "AND"; protected static final String NOT_STRING = "NOT"; protected static final String SPACE_STRING = " "; final Rule orOperatorRule = terminal( OR_STRING ); final Rule andOperatorRule = terminal( AND_STRING ); final Rule notOperatorRule = terminal( NOT_STRING ); final Rule leftParenRule = terminal( "(" ); final Rule rightParenRule = terminal( ")" ); final Rule doubleQuoteRule = terminal( "\"" ); final Rule spaceRule = terminal( SPACE_STRING ); // This method exists to detect end of input. public Rule inputPhrase() { return Sequence( keywordQueryExpression(), EOI ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <keyword-query-expression> ::= <term> (<boolean-operator> <term>)*; * <p> * The implementation was changed to allow whitespace. <br/> * keyword query expression = optional whitespace, term, {boolean operator, * term}, optional whitespace; * </p> */ Rule keywordQueryExpression() { StringVar operator = new StringVar(); return Sequence( optionalWhiteSpace(), term(), ZeroOrMore( booleanOperator(), operator.set( match() ), term(), push( new OperatorASTNode( operator.get(), pop( 1 ), pop() ) ) ), optionalWhiteSpace() ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <boolean-operator> ::= <and> | <or> | <not>; * <p> * The implementation was changed to evaluate OR and NOT first, so all * spaces aren't evaluated as ANDs. <br/> * boolean operator = or | not | and; * </p> */ Rule booleanOperator() { return FirstOf( or(), not(), and() ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <and> ::= “ AND ” | “ ”; * <p> * The implementation was changed to allow whitespace and to not require boolean operators to be wrapped in spaces. * <br/> * and = (optional whitespace, "AND", optional whitespace) | mandatory whitespace; * </p> */ Rule and() { return FirstOf( Sequence( optionalWhiteSpace(), andOperatorRule, optionalWhiteSpace() ), mandatoryWhiteSpace() ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <or> ::= “ OR ”; * <p> * The implementation was changed to allow whitespace and to not require boolean operators to be wrapped in spaces. * <br/> * or = (optional whitespace, "OR", optional whitespace); * </p> */ Rule or() { return Sequence( optionalWhiteSpace(), orOperatorRule, optionalWhiteSpace() ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <not> ::= “ NOT ”; * <p> * The implementation was changed to allow whitespace and to not require boolean operators to be wrapped in spaces. * <br/> * not = (optional whitespace, "NOT", optional whitespace); * </p> */ Rule not() { return Sequence( optionalWhiteSpace(), notOperatorRule, optionalWhiteSpace() ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <term> ::= <keyword> | <phrase> | <group>; * <p> * The implementation was changed to evaluate the most specific rule first. * <br/> * term = group | phrase | keyword; * </p> */ Rule term() { return FirstOf( group(), phrase(), keyword() ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <phrase> ::= '"' <keyword> (' '<keyword>)* '"'; * <p> * The implementation was changed to allow whitespace. <br/> * phrase = optional whitespace, '"', optional whitespace, keyword, { * optional whitespace, keyword}, optional whitespace, '"'; * </p> */ Rule phrase() { Action stackPhraseRewriteAction = new StackPhraseRewriteAction(); // only grab leading spaces return Sequence( optionalWhiteSpace(), doubleQuoteRule, optionalWhiteSpace(), push( new PhraseDelimiterASTNode() ), keyword(), ZeroOrMore( Sequence( optionalWhiteSpace(), keyword() ) ), stackPhraseRewriteAction, optionalWhiteSpace(), doubleQuoteRule ); } /** * Original Keyword Query Specification EBNF excerpt <br/> * <group> ::= '('<keyword-query-expression>')'; * <p> * The implementation was changed to allow whitespace. <br/> * group = optional whitespace, '(', optional whitespace, keyword query * expression, optional whitespace, ')'; * </p> */ Rule group() { // only grab leading spaces return Sequence( optionalWhiteSpace(), leftParenRule, optionalWhiteSpace(), keywordQueryExpression(), optionalWhiteSpace(), rightParenRule ); } /** * Original Keyword Query Specification excerpt <br/> * "A keyword is a single string (containing no whitespaces) such as "test * " or "hello"." * <p> * All characters except: EOI, whitespace, (, ), ". <br/> * </p> */ Rule keyword() { return Sequence( OneOrMore( NoneOf( " \t\n\f()\"" ) ), push( new KeywordASTNode( matchOrDefault( "*" ) ) ) ); } // Previously, all strings were wrapped in Spacing() by this function @SuppressNode Rule terminal( String t ) { return String( t ).label( '\'' + t + '\'' ); } /** * This was added to allow whitespace. optional whitespace = {' '}; */ @SuppressNode Rule optionalWhiteSpace() { return ZeroOrMore( AnyOf( " \t\r\n\f" ).label( "Optional Whitespace" ) ); } /** * This was added to allow whitespace. mandatory whitespace = ' ', optional * whitespace; */ Rule mandatoryWhiteSpace() { return OneOrMore( AnyOf( " \t\r\n\f" ).label( "Mandatory Whitespace" ) ); } public class StackPhraseRewriteAction implements Action { // pop all keywords off of the stack and combine them with quotes and // push them back @Override public boolean run( Context context ) { StringBuilder keywords = new StringBuilder( "" ); // loop through the stack until it's empty or we hit a non-keyword while ( !isStackEmpty() && !peek().isPhraseStartDelimiter() ) { // restore the original order since popping them off the top // reverses the order keywords.insert( 0, pop().getKeyword() ); keywords.insert( 0, SPACE_STRING ); } // make sure we clear the phrase start marker from the stack if ( peek().isPhraseStartDelimiter() ) { drop(); } // push the keywords minus the leading space back onto the stack as // a single keyword push( new KeywordASTNode( keywords.toString().substring( 1 ) ) ); return true; } // there doesn't appear to be a better way to do this in parboiled // without accessing the // stack directly private boolean isStackEmpty() { try { peek(); } catch ( IllegalArgumentException iae ) { return true; } return false; } } }