/** Copyright (c) 2012 Delcyon, Inc. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package com.delcyon.capo.parsers; import java.io.InputStream; import java.util.HashMap; import java.util.Map.Entry; import java.util.Set; import java.util.Vector; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import com.delcyon.capo.parsers.Tokenizer.CharacterType; import com.delcyon.capo.xml.XPath; /** * @author jeremiah */ public class GrammarParser { public enum SymbolType { /** used to specify WHITESPACE in the tokenizer **/ DELIMITER, /** used to specify a regex replacement pattern to identify LITERALS and strip them of their indicator chars **/ LITERAL, ASSIGNMENT, ALTERNATION, DECLARATION, EOL, SYMBOL } private HashMap<SymbolType, String[]> symbolHashMap = new HashMap<SymbolType, String[]>(); //symbol types are only used in the setDelimter code private HashMap<String, SymbolType> symbolTypeHashMap = new HashMap<String, SymbolType>(); private Vector<ParseRule> notationParseRuleVector; //this is used to parse and understand a grammar. private Vector<ParseRule> grammerParseRuleVector; //this set of rules is used to parse input based on a grammar. private String prefix; private String uri; public GrammarParser() { symbolHashMap.put(SymbolType.DELIMITER, new String[] { " ", "\t", "EOL" }); symbolHashMap.put(SymbolType.LITERAL, new String[] { "'(.+)'" }); //symbolHashMap.put(SymbolType.LITERAL.toString(), new String[] { "\"(.+)\"", "'(.+)'" }); // symbolHashMap.put(SymbolType.ASSIGNMENT, new String[] { "=" }); // symbolHashMap.put(SymbolType.ALTERNATION, new String[] { "|" }); // symbolHashMap.put(SymbolType.EOL, new String[] { "\n" }); Set<Entry<SymbolType, String[]>> symbolEntrySet = symbolHashMap.entrySet(); for (Entry<SymbolType, String[]> entry : symbolEntrySet) { String[] symbols = entry.getValue(); for (String symbol : symbols) { symbolTypeHashMap.put(symbol, entry.getKey()); } } } private ParseTree loadDefaultNotationParseTree() { ParseTree parseTree = new ParseTree(); parseTree.setSymbolHashMap(symbolHashMap); ParseRule ruleListParseRule = new ParseRule("RULE_LIST",new String[]{"RULE+"}); parseTree.addRule(ruleListParseRule); ParseRule ruleParseRule = new ParseRule("RULE",new String[]{"RULE_NAME","'='", "EXPRESSION+","EOL"}); parseTree.addRule(ruleParseRule); ParseRule expressionParseRule = new ParseRule("EXPRESSION",new String[]{"TERM+"},new String[]{"'|'", "TERM+"}); parseTree.addRule(expressionParseRule); ParseRule termParseRule = new ParseRule("TERM",new String[]{"VALUE"}); parseTree.addRule(termParseRule); return parseTree; } public void loadNotationGrammer(InputStream inputStream) throws Exception { //prepare symbol table with loaded symbols Tokenizer streamTokenizer = new Tokenizer(inputStream); streamTokenizer.resetSyntax(); streamTokenizer.setCharRangeType(33, 126,CharacterType.ALPHA); streamTokenizer.setCharType('"', CharacterType.QUOTE); streamTokenizer.setCharType('\\', CharacterType.ESCAPE); streamTokenizer.setCharType('\n', CharacterType.EOL); streamTokenizer.setCharType('\r', CharacterType.EOL); setDelimiters(streamTokenizer, SymbolType.DELIMITER); ParseTree notationParseTree = loadDefaultNotationParseTree(); notationParseTree.setSymbolHashMap(symbolHashMap); notationParseTree.setUseLiteralsAsTokens(false); //notationParseTree.setSymbolTypeHashMap(symbolTypeHashMap); Document parseDocument = notationParseTree.parse(streamTokenizer); //XPath.dumpNode(parseDocument, System.out); notationParseRuleVector = getParseRules(parseDocument); } public void loadGrammer(InputStream inputStream) throws Exception { //prepare symbol table with loaded symbols Tokenizer streamTokenizer = new Tokenizer(inputStream); streamTokenizer.resetSyntax(); streamTokenizer.setCharRangeType(33, 126,CharacterType.ALPHA); streamTokenizer.setCharType('"', CharacterType.QUOTE); streamTokenizer.setCharType('\\', CharacterType.ESCAPE); streamTokenizer.setCharType('\n', CharacterType.EOL); streamTokenizer.setCharType('\r', CharacterType.EOL); setDelimiters(streamTokenizer, SymbolType.DELIMITER); ParseTree grammerParseTree = null; if(notationParseRuleVector != null) { grammerParseTree = new ParseTree(); for (ParseRule parseRule : notationParseRuleVector) { grammerParseTree.addRule(parseRule); } } else { grammerParseTree = loadDefaultNotationParseTree(); } grammerParseTree.setSymbolHashMap(symbolHashMap); grammerParseTree.setUseLiteralsAsTokens(false); //grammerParseTree.setSymbolTypeHashMap(symbolTypeHashMap); Document parseDocument = grammerParseTree.parse(streamTokenizer); //XPath.dumpNode(parseDocument, System.out); grammerParseRuleVector = getParseRules(parseDocument); } public Document parse(InputStream inputStream) throws Exception { //prepare symbol table with loaded symbols Tokenizer streamTokenizer = new Tokenizer(inputStream); streamTokenizer.resetSyntax(); streamTokenizer.setCharRangeType(33, 126,CharacterType.ALPHA); streamTokenizer.setCharType('\n', CharacterType.EOL); streamTokenizer.setCharType('\r', CharacterType.EOL); //streamTokenizer.setCharType('"', CharacterType.QUOTE); //streamTokenizer.quoteChar('\''); setDelimiters(streamTokenizer, SymbolType.DELIMITER); ParseTree inputParseTree = new ParseTree(); inputParseTree.setNamespace(prefix,uri); inputParseTree.setAllowPartialMatch(true); inputParseTree.setSymbolHashMap(symbolHashMap); inputParseTree.setUseLiteralsAsTokens(true); for (ParseRule parseRule : grammerParseRuleVector) { inputParseTree.addRule(parseRule); } Document parseDocument = inputParseTree.parse(streamTokenizer); // XPath.dumpNode(parseDocument, System.out); return parseDocument; } private void setDelimiters(Tokenizer streamTokenizer, SymbolType symbolName) { String[] delimiters = symbolHashMap.get(symbolName); if (delimiters == null) { return; } for (String string : delimiters) { if (string.length() == 1) { streamTokenizer.setCharType(string.charAt(0), CharacterType.WHITESPACE); } else if (string.length() > 1) { setDelimiters(streamTokenizer, SymbolType.valueOf(string)); } } } /** * This returns a set of rules that represent a grammar. * @param ruleDocument * @return * @throws Exception */ private Vector<ParseRule> getParseRules(Document ruleDocument) throws Exception { Vector<ParseRule> parseRuleVector = new Vector<ParseRule>(); NodeList ruleList = XPath.selectNodes(ruleDocument, "//RULE"); for(int ruleIndex = 0; ruleIndex < ruleList.getLength(); ruleIndex++) { Element ruleElement = (Element) ruleList.item(ruleIndex); NodeList expressionNodeList = XPath.selectNodes(ruleElement, "EXPRESSION"); Vector<Vector<String>> expressionsVector = new Vector<Vector<String>>(); for(int expressionIndex = 0; expressionIndex < expressionNodeList.getLength(); expressionIndex++) { Vector<String> expressionVector = new Vector<String>(); NodeList termNodeList = XPath.selectNodes(expressionNodeList.item(expressionIndex), "TERM"); for(int termIndex = 0; termIndex < termNodeList.getLength(); termIndex++) { String value = ((Element) termNodeList.item(termIndex)).getAttribute("VALUE"); if(symbolTypeHashMap.get(value) == SymbolType.ALTERNATION) { expressionsVector.add(expressionVector); expressionVector = new Vector<String>(); //System.err.println(symbolTypeHashMap.get(value)+"<---"+value); } else { //System.err.println(symbolTypeHashMap.get(value)+"<==="+value); expressionVector.add(value); } } expressionsVector.add(expressionVector); } String[][] expressions = new String[expressionsVector.size()][]; for(int expressionsIndex = 0 ; expressionsIndex < expressionsVector.size(); expressionsIndex++) { Vector<String> expressionVectorLocal = expressionsVector.get(expressionsIndex); expressions[expressionsIndex] = new String[expressionVectorLocal.size()]; for(int termIndex = 0; termIndex < expressions[expressionsIndex].length; termIndex++) { expressions[expressionsIndex][termIndex] = expressionVectorLocal.get(termIndex); } } ParseRule parseRule = new ParseRule(ruleElement.getAttribute("RULE_NAME"),expressions); if(parseRule.getName().equals("ALTERNATION")) { System.out.println(parseRule.getName()+"==>"+expressionsVector); } else if(parseRule.getName().equals("ASSIGNMENT")) { System.out.println(parseRule.getName()+"==>"+expressionsVector); } else { parseRuleVector.add(parseRule); //System.out.println(parseRule); } } return parseRuleVector; } public void setNamespace(String prefix, String uri) { this.prefix = prefix; this.uri = uri; } }