// This file is part of AceWiki.
// Copyright 2008-2013, AceWiki developers.
//
// AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
// not, see http://www.gnu.org/licenses/.
package ch.uzh.ifi.attempto.chartparser;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* This class represents a grammar that is needed to run the chart parser. A grammar can be created
* either directly in Java or on the basis of a file in the Codeco notation. See the package
* description of {@link ch.uzh.ifi.attempto.codeco} for more information about the Codeco notation.
*
* @author Tobias Kuhn
*/
public class Grammar {
private List<GrammarRule> rules = new ArrayList<GrammarRule>();
private List<LexicalRule> lexRules = new ArrayList<LexicalRule>();
private Map<String, List<GrammarRule>> rulesByHeadName = new HashMap<String, List<GrammarRule>>();
private Map<String, List<LexicalRule>> lexRulesByCat = new HashMap<String, List<LexicalRule>>();
private Map<String, List<LexicalRule>> lexRulesByWord = new HashMap<String, List<LexicalRule>>();
private Set<String> terminalSymbols = new TreeSet<String>();
private Set<String> preterminalSymbols = new TreeSet<String>();
private Set<String> nonterminalSymbols = new TreeSet<String>();
private Set<String> featureNames = new TreeSet<String>();
/**
* Creates an empty grammar.
*/
public Grammar() {
}
/**
* Adds a grammar rule.
*
* @param rule The grammar rule to be added.
*/
public void addGrammarRule(GrammarRule rule) {
rules.add(rule);
rulesByHeadName(rule.getHead().getName()).add(rule);
processCategory(rule.getHead());
for (Category c : rule.getBody()) {
processCategory(c);
}
}
/**
* Adds a lexical rule. Lexical rules could also be called "lexicon entries".
*
* @param lexRule The lexical rule to be added.
*/
public void addLexicalRule(LexicalRule lexRule) {
lexRules.add(lexRule);
lexRulesByCat(lexRule.getCategory().getName()).add(lexRule);
lexRulesByWord(lexRule.getWord().getName()).add(lexRule);
processCategory(lexRule.getWord());
processCategory(lexRule.getCategory());
}
/**
* Returns the internal list of grammar rules with a head category of the specified name.
*
* @param name The name of the head category.
* @return The internal list of grammar rules.
*/
List<GrammarRule> rulesByHeadName(String name) {
List<GrammarRule> l = rulesByHeadName.get(name);
if (l == null) {
l = new ArrayList<GrammarRule>();
rulesByHeadName.put(name, l);
}
return l;
}
/**
* Returns the grammar rules with a head category of the specified name.
*
* @param name The name of the head category.
* @return A list of grammar rules.
*/
public List<GrammarRule> getRulesByHeadName(String name) {
List<GrammarRule> l = rulesByHeadName.get(name);
if (l == null) {
l = new ArrayList<GrammarRule>();
rulesByHeadName.put(name, l);
}
return l;
}
/**
* Returns the internal list of lexical rules with a pre-terminal category of the specified
* name.
*
* @param categoryName The name of the pre-terminal category.
* @return The internal list of lexical rules.
*/
List<LexicalRule> lexRulesByCat(String categoryName) {
List<LexicalRule> l = lexRulesByCat.get(categoryName);
if (l == null) {
l = new ArrayList<LexicalRule>();
lexRulesByCat.put(categoryName, l);
}
return l;
}
/**
* Returns the list of lexical rules with a pre-terminal category of the specified name.
*
* @param categoryName The name of the pre-terminal category.
* @return A list of lexical rules.
*/
public List<LexicalRule> getLexicalRulesByCategory(String categoryName) {
return new ArrayList<LexicalRule>(lexRulesByCat(categoryName));
}
/**
* Returns the internal list of lexical rules for the specified word. The word corresponds to a
* terminal category.
*
* @param word The word.
* @return The internal list of lexical rules.
*/
List<LexicalRule> lexRulesByWord(String word) {
List<LexicalRule> l = lexRulesByWord.get(word);
if (l == null) {
l = new ArrayList<LexicalRule>();
lexRulesByWord.put(word, l);
}
return l;
}
/**
* Returns the list of lexical rules for the specified word. The word corresponds to a terminal
* category.
*
* @param word The word.
* @return A list of lexical rules.
*/
public List<LexicalRule> getLexicalRulesByWord(String word) {
return new ArrayList<LexicalRule>(lexRulesByWord(word));
}
/**
* Returns an array of all names of features used in feature structures of categories contained
* in this grammar. The list contains no duplicates and the elements are sorted alphabetically.
*
* @return An array of all used feature names in alphabetical order.
*/
String[] getFeatureNamesArray() {
return featureNames.toArray(new String[]{});
}
/**
* Returns a set of all names of features used in feature structures of categories contained in
* this grammar.
*
* @return A set of all used feature names.
*/
public Set<String> getFeatureNames() {
return new TreeSet<String>(featureNames);
}
/**
* Returns whether the given feature name is used in this grammar.
*
* @param featureName The feature name.
* @return true if the feature name is used.
*/
public boolean containsFeatureName(String featureName) {
return featureNames.contains(featureName);
}
/**
* Returns a set of all terminal symbols used in this grammar.
*
* @return A set of all terminal symbols.
*/
public Set<String> getTerminalSymbols() {
return new TreeSet<String>(terminalSymbols);
}
/**
* Returns whether the given terminal symbol is used in this grammar.
*
* @param terminalSymbol The terminal symbol.
* @return true if the symbol is used.
*/
public boolean containsTerminalSymbol(String terminalSymbol) {
return terminalSymbols.contains(terminalSymbol);
}
/**
* Returns a set of all preterminal symbols used in this grammar.
*
* @return A set of all preterminal symbols.
*/
public Set<String> getPreterminalSymbols() {
return new TreeSet<String>(preterminalSymbols);
}
/**
* Returns whether the given preterminal symbol is used in this grammar.
*
* @param preterminalSymbol The preterminal symbol.
* @return true if the symbol is used.
*/
public boolean containsPreterminalSymbol(String preterminalSymbol) {
return preterminalSymbols.contains(preterminalSymbol);
}
/**
* Returns a set of all nonterminal symbols used in this grammar.
*
* @return A set of all nonterminal symbols.
*/
public Set<String> getNonterminalSymbols() {
return new TreeSet<String>(nonterminalSymbols);
}
/**
* Returns whether the given nonterminal symbol is used in this grammar.
*
* @param nonterminalSymbol The nonterminal symbol.
* @return true if the symbol is used.
*/
public boolean containsNonterminalSymbol(String nonterminalSymbol) {
return nonterminalSymbols.contains(nonterminalSymbol);
}
/**
* This is an auxiliary method for grammar classes that are automatically generated out of a
* Codeco representation. It sets a feature of a feature map to a certain unbound variable.
*
* @param fm The feature map for which a feature should be set.
* @param featureName The name of the feature to be set.
* @param varID The identifier of the unbound variable to which the feature should be set.
* @param featureHash A hash map with variable identiers as keys and the string reference
* objects that represent the respective variables as values.
*/
protected static void setFeature(FeatureMap fm, String featureName, int varID,
HashMap<Integer, StringRef> featureHash) {
if (featureHash.get(varID) == null) {
StringRef stringRef = new StringRef();
fm.setFeature(featureName, stringRef);
featureHash.put(varID, stringRef);
} else {
fm.setFeature(featureName, featureHash.get(varID));
}
}
/**
* This is an auxiliary method for grammar classes that are automatically generated out of a
* Codeco representation. It returns a string reference object that represents a certain
* unbound variable.
*
* @param varID The identifier of the unbound variable for which a string reference object
* should be returned.
* @param featureHash A hash map with variable identiers as keys and the string reference
* objects that represent the respective variables as values.
* @return A string reference object.
*/
protected static StringRef getStringRef(int varID, HashMap<Integer, StringRef> featureHash) {
StringRef stringRef = featureHash.get(varID);
if (stringRef == null) {
stringRef = new StringRef();
featureHash.put(varID, stringRef);
}
return stringRef;
}
private void processCategory(Category c) {
if (c instanceof Terminal) {
terminalSymbols.add(c.getName());
} else if (c instanceof Preterminal) {
preterminalSymbols.add(c.getName());
} else if (c instanceof Nonterminal) {
nonterminalSymbols.add(c.getName());
}
Set<String> fnames = c.getFeatureNames();
if (fnames != null) featureNames.addAll(fnames);
}
public String toString() {
String s = "";
for (GrammarRule r : rules) {
s += r + "\n";
}
s += "\n";
for (LexicalRule le : lexRules) {
s += le + "\n";
}
return s;
}
}