/** * Copyright (c) Codice Foundation * <p> * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser * General Public License as published by the Free Software Foundation, either version 3 of the * License, or any later version. * <p> * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. A copy of the GNU Lesser General Public License * is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. */ package org.codice.ddf.endpoints; import org.parboiled.Action; import org.parboiled.BaseParser; import org.parboiled.Context; import org.parboiled.Rule; import org.parboiled.annotations.BuildParseTree; import org.parboiled.annotations.SuppressNode; import org.parboiled.support.StringVar; /** * This parser is based on a modified version of the * "IC/DoD keyword Query Language Specification, V2.0" DRAFT (4 September 2012). This spec includes * an EBNF that this parser is based on. All changes to that EBNF were made to add variable * whitespace handling and to make parsing more efficient. */ @SuppressWarnings({"InfiniteRecursion"}) @BuildParseTree public class KeywordTextParser extends BaseParser<ASTNode> { protected static final String OR_STRING = "OR"; protected static final String AND_STRING = "AND"; protected static final String NOT_STRING = "NOT"; protected static final String SPACE_STRING = " "; protected final Rule orOperator = terminal(OR_STRING); protected final Rule andOperator = terminal(AND_STRING); protected final Rule notOperator = terminal(NOT_STRING); protected final Rule lpar = terminal("("); protected final Rule rpar = terminal(")"); protected final Rule dblquote = terminal("\""); protected final Rule spaceRule = terminal(SPACE_STRING); // This method exists to detect end of input. public Rule inputPhrase() { return Sequence(keywordQueryExpression(), EOI); } /** * Original keyword Query Specification EBNF excerpt <br/> * <keyword-query-expression> ::= <term> (<boolean-operator> <term>)*; * <p> * The implementation was changed to allow whitespace. <br/> * keyword query expression = optional whitespace, term, {boolean operator, term}, optional * whitespace; * </p> */ Rule keywordQueryExpression() { StringVar operator = new StringVar(); return Sequence(optionalWhiteSpace(), term(), ZeroOrMore(booleanOperator(), operator.set(match()), term(), push(new OperatorASTNode(operator.get(), pop(1), pop()))), optionalWhiteSpace()); } /** * Original keyword Query Specification EBNF excerpt <br/> * <boolean-operator> ::= <and> | <or> | <not>; * <p> * The implementation was changed to evaluate OR and NOT first, so all spaces aren't evaluated * as ANDs. <br/> * boolean operator = or | not | and; * </p> */ Rule booleanOperator() { return FirstOf(or(), not(), and()); } /** * Original keyword Query Specification EBNF excerpt <br/> * <and> ::= “ AND ” | “ ”; * <p> * The implementation was changed to allow whitespace and to not require boolean operators to be * wrapped in spaces. <br/> * and = (optional whitespace, "AND", optional whitespace) | mandatory whitespace; * </p> */ Rule and() { return FirstOf(Sequence(optionalWhiteSpace(), andOperator, optionalWhiteSpace()), mandatoryWhiteSpace()); } /** * Original keyword Query Specification EBNF excerpt <br/> * <or> ::= “ OR ”; * <p> * The implementation was changed to allow whitespace and to not require boolean operators to be * wrapped in spaces. <br/> * or = (optional whitespace, "OR", optional whitespace); * </p> */ Rule or() { return Sequence(optionalWhiteSpace(), orOperator, optionalWhiteSpace()); } /** * Original keyword Query Specification EBNF excerpt <br/> * <not> ::= “ NOT ”; * <p> * The implementation was changed to allow whitespace and to not require boolean operators to be * wrapped in spaces. <br/> * not = (optional whitespace, "NOT", optional whitespace); * </p> */ Rule not() { return Sequence(optionalWhiteSpace(), notOperator, optionalWhiteSpace()); } /** * Original keyword Query Specification EBNF excerpt <br/> * <term> ::= <keyword> | <phrase> | <group>; * <p> * The implementation was changed to evaluate the most specific rule first. <br/> * term = group | phrase | keyword; * </p> */ Rule term() { return FirstOf(group(), phrase(), keyword()); } /** * Original keyword Query Specification EBNF excerpt <br/> * <phrase> ::= '"' <keyword> (' '<keyword>)* '"'; * <p> * The implementation was changed to allow whitespace. <br/> * phrase = optional whitespace, '"', optional whitespace, keyword, { optional whitespace, * keyword}, optional whitespace, '"'; * </p> */ Rule phrase() { Action stackPhraseRewriteAction = new StackPhraseRewriteAction(); // only grab leading spaces return Sequence(optionalWhiteSpace(), dblquote, optionalWhiteSpace(), push(new PhraseDelimiterASTNode()), keyword(), ZeroOrMore(Sequence(optionalWhiteSpace(), keyword())), stackPhraseRewriteAction, optionalWhiteSpace(), dblquote); } /** * Original keyword Query Specification EBNF excerpt <br/> * <group> ::= '('<keyword-query-expression>')'; * <p> * The implementation was changed to allow whitespace. <br/> * group = optional whitespace, '(', optional whitespace, keyword query expression, optional * whitespace, ')'; * </p> */ Rule group() { // only grab leading spaces return Sequence(optionalWhiteSpace(), lpar, optionalWhiteSpace(), keywordQueryExpression(), optionalWhiteSpace(), rpar); } /** * Original keyword Query Specification excerpt <br/> * "A keyword is a single string (containing no whitespaces) such as "test" or "hello"." * <p> * All characters except: EOI, whitespace, (, ), ". <br/> * </p> */ Rule keyword() { // TODO the default value is used to allow the parser to keep running during error // recovery... is this right? return Sequence(OneOrMore(NoneOf(" \t\n\f()\"")), push(new KeywordASTNode(matchOrDefault("defaultKeyword")))); } // Previously, all strings were wrapped in Spacing() by this function @SuppressNode Rule terminal(String t) { return String(t).label('\'' + t + '\''); } /** * This was added to allow whitespace. optional whitespace = {' '}; */ @SuppressNode Rule optionalWhiteSpace() { return ZeroOrMore(AnyOf(" \t\r\n\f").label("Optional Whitespace")); } /** * This was added to allow whitespace. mandatory whitespace = ' ', optional whitespace; */ Rule mandatoryWhiteSpace() { return OneOrMore(AnyOf(" \t\r\n\f").label("Mandatory Whitespace")); } public class StackPhraseRewriteAction implements Action { // pop all keywords off of the stack and combine them with quotes and push them back @Override public boolean run(Context context) { StringBuilder keywords = new StringBuilder(""); // loop through the stack until it's empty or we hit a non-keyword while (!isStackEmpty() && !peek().isPhraseStartDelimiter()) { // restore the original order since popping them off the top reverses the order keywords.insert(0, pop().getKeyword()); keywords.insert(0, SPACE_STRING); } // make sure we clear the phrase start marker from the stack if (peek().isPhraseStartDelimiter()) { drop(); } // push the keywords minus the leading space back onto the stack as a single keyword push(new KeywordASTNode(keywords.toString() .substring(1))); return true; } // there doesn't appear to be a better way to do this in parboiled without accessing the // stack directly private boolean isStackEmpty() { try { peek(); } catch (IllegalArgumentException iae) { return true; } return false; } } }