/* Copyright 2009-2016 David Hadka * * This file is part of the MOEA Framework. * * The MOEA Framework is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or (at your * option) any later version. * * The MOEA Framework is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the MOEA Framework. If not, see <http://www.gnu.org/licenses/>. */ package org.moeaframework.util.grammar; import java.io.IOException; import java.io.Reader; import java.io.StreamTokenizer; /** * Parses simple context-free grammars in Backus-Naur form (BNF). The following * example demonstrates the accepted syntax. Newlines indicate the end of a * rule; single and double quotes can be used to escape the control characters * (":", "=", "|", "//", etc.); C and C++ style comments are supported. * * <pre> * {@code * <expr> ::= <expr> <op> <expr> | "func(" <expr> ")" | <val> * <op> ::= + | - | * | '/' * <val> ::= x | y | z * } * </pre> */ public class Parser { /** * Private constructor to prevent instantiation. */ private Parser() { super(); } /** * Parses the context-free grammar. * * @param reader the {@link Reader} containing the BNF context-free grammar * @return the grammar * @throws IOException if an I/O error occurred * @throws GrammarException if an error occurred parsing the BNF * context-free grammar */ public static ContextFreeGrammar load(Reader reader) throws IOException { StreamTokenizer tokenizer = new StreamTokenizer(reader); tokenizer.resetSyntax(); tokenizer.wordChars('a', 'z'); tokenizer.wordChars('A', 'Z'); tokenizer.wordChars('0', '9'); tokenizer.wordChars('<', '<'); tokenizer.wordChars('>', '>'); tokenizer.wordChars('_', '_'); tokenizer.wordChars('-', '-'); tokenizer.wordChars('.', '.'); tokenizer.wordChars(128 + 32, 255); tokenizer.whitespaceChars(0, ' '); tokenizer.quoteChar('"'); tokenizer.quoteChar('\''); tokenizer.eolIsSignificant(true); tokenizer.slashSlashComments(true); tokenizer.slashStarComments(true); ContextFreeGrammar grammar = new ContextFreeGrammar(); Rule rule = null; Production production = null; while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) { if ((tokenizer.ttype == ':') || (tokenizer.ttype == '=')) { do { tokenizer.nextToken(); } while ((tokenizer.ttype == ':') || (tokenizer.ttype == '=')); if ((rule == null) || (production != null)) { throw new GrammarException("unexpected rule separator", tokenizer.lineno()); } tokenizer.pushBack(); } else if (tokenizer.ttype == '|') { if ((rule != null) && (production == null)) { throw new GrammarException( "rule must contain at least one production", tokenizer.lineno()); } production = null; } else if (tokenizer.ttype == StreamTokenizer.TT_EOL) { if ((rule != null) && (production == null)) { throw new GrammarException( "rule must contain at least one production", tokenizer.lineno()); } rule = null; production = null; } else { String string = null; if ((tokenizer.ttype == StreamTokenizer.TT_WORD) || (tokenizer.ttype == '\'') || (tokenizer.ttype == '\"')) { string = tokenizer.sval; } else if (tokenizer.ttype == StreamTokenizer.TT_NUMBER) { string = Double.toString(tokenizer.nval); } else { string = Character.toString((char)tokenizer.ttype); } if (string.startsWith("<") && string.endsWith(">")) { string = string.substring(1, string.length() - 1); if (string.isEmpty()) { throw new GrammarException("invalid symbol", tokenizer.lineno()); } if (rule == null) { rule = new Rule(new Symbol(string, false)); grammar.add(rule); } else if (production == null) { production = new Production(); production.add(new Symbol(string, false)); rule.add(production); } else { production.add(new Symbol(string, false)); } } else { if (rule == null) { throw new GrammarException( "rule must start with non-terminal", tokenizer .lineno()); } else if (production == null) { production = new Production(); production.add(new Symbol(string, true)); rule.add(production); } else { production.add(new Symbol(string, true)); } } } } if ((rule != null) && (production == null)) { throw new GrammarException( "rule must contain at least one production", tokenizer .lineno()); } return grammar; } }