package aima.core.nlp.parsing.grammars;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Represents the most general grammatical formalism,
* the Unrestricted (or Recrusively Enumerable) Grammar.
* All other grammars can derive from this grammar, imposing extra
* restrictions.
* @author Jonathon
*
*/
public class ProbUnrestrictedGrammar implements ProbabilisticGrammar {
// types of grammars
public static final int UNRESTRICTED = 0;
public static final int CONTEXT_SENSITIVE = 1;
public static final int CONTEXT_FREE = 2;
public static final int REGULAR = 3;
public static final int CNFGRAMMAR = 4;
public static final int PROB_CONTEXT_FREE = 5;
public List<Rule> rules;
public List<String> vars;
public List<String> terminals;
public int type;
// default constructor. has no rules
public ProbUnrestrictedGrammar() {
type = 0;
rules = new ArrayList<Rule>();
vars = new ArrayList<String>();
terminals = new ArrayList<String>();
}
/**
* Add a number of rules at once, testing each in turn
* for validity, and then testing the batch for probability validity.
* @param ruleList
* @return true if rules are valid and incorporated into the grammar. false, otherwise
*/
public boolean addRules( List<Rule> ruleList ) {
for( int i=0; i < ruleList.size(); i++ ) {
if( !validRule(ruleList.get(i)) ) {
return false;
}
}
if( !validateRuleProbabilities(ruleList)) {
return false;
}
this.rules = ruleList;
updateVarsAndTerminals();
return true;
}
/**
* Add a single rule the grammar, testing it for structural
* and probability validity.
* @param rule
* @return true if rule is incorporated. false, otherwise
*/
// TODO: More sophisticated probability distribution management
public boolean addRule( Rule rule ) {
if( validRule(rule)) {
rules.add(rule);
updateVarsAndTerminals( rule );
return true;
}
else {
return false;
}
}
/**
* For a set of rules, test whether each batch of rules with the same
* LHS have their probabilities sum to exactly 1.0
* @param ruleList
* @return true if the probabilities are valid. false, otherwise
*/
public boolean validateRuleProbabilities( List<Rule> ruleList ) {
float probTotal = 0;
for( int i=0; i < vars.size(); i++ ) {
for( int j=0; j < ruleList.size(); j++ ) {
// reset probTotal at start
if( j == 0 ) {
probTotal = (float) 0.0;
}
if( ruleList.get(i).lhs.get(0).equals(vars.get(i))) {
probTotal += ruleList.get(i).PROB;
}
// check probTotal hasn't exceed max
if( probTotal > 1.0 ) {
return false;
}
// check we have correct probability total
if( j == ruleList.size() -1 && probTotal != (float) 1.0 ) {
return false;
}
}
}
return true;
}
/**
* Test validity of the LHS and RHS of grammar rule.
* In unrestricted grammar, the only invalid rule type is
* a rule with a null LHS.
* @param r ( a rule )
* @return true, if rule has valid form. false, otherwise
*/
public boolean validRule( Rule r ) {
if( r.lhs != null && r.lhs.size() > 0 ) {
return true;
}
else {
return false;
}
}
/**
* Whenever a new rule is added to the grammar, we want to
* update the list of variables and terminals with any new grammar symbols
*/
public void updateVarsAndTerminals() {
if( rules == null ) {
vars = new ArrayList<String>();
terminals = new ArrayList<String>();
return;
}
for( int i=0; i < rules.size(); i++ ) {
Rule r = rules.get(i);
updateVarsAndTerminals(r); // update the variables and terminals for this rule
}
}
/**
* Update variable and terminal lists with a single rule's symbols,
* if there a new symbols
* @param r
*/
public void updateVarsAndTerminals( Rule r ) {
// check lhs for new terminals or variables
for( int j=0; j < r.lhs.size(); j++ ) {
if( isVariable(r.lhs.get(j)) && !vars.contains(r.lhs.get(j))) {
vars.add(r.lhs.get(j));
}
else if( isTerminal(r.lhs.get(j)) && !terminals.contains(r.lhs.get(j))) {
terminals.add(r.lhs.get(j));
}
}
// for rhs we must check that this isn't a null-rule
if ( r.rhs != null ) {
// check rhs for new terminals or variables
for( int j=0; j < r.rhs.size(); j++ ) {
if( isVariable(r.rhs.get(j)) && !vars.contains(r.rhs.get(j))) {
vars.add(r.rhs.get(j));
}
else if( isTerminal(r.rhs.get(j)) && !terminals.contains(r.rhs.get(j))) {
terminals.add(r.rhs.get(j));
}
}
}
// maintain sorted lists
Collections.sort(vars);
Collections.sort(terminals);
}
/**
* Check if we have a variable, as they are uppercase strings.
* @param s
* @return
*/
public static boolean isVariable(String s) {
for (int i=0; i < s.length(); i++)
{
if (!Character.isUpperCase(s.charAt(i))) {
return false;
}
}
return true;
}
/**
* Check if we have a terminal, as they are lowercase strings
* @param s
* @return true, if string must be a terminal. false, otherwise
*/
public static boolean isTerminal(String s) {
for (int i=0; i < s.length(); i++ ) {
if( !Character.isLowerCase(s.charAt(i))) {
return false;
}
}
return true;
}
@Override
public String toString() {
StringBuilder output = new StringBuilder();
output.append("Variables: ");
this.vars.forEach(var -> output.append(var).append(", "));
output.append('\n');
output.append("Terminals: ");
this.terminals.forEach(terminal -> output.append(terminal).append(", "));
output.append('\n');
this.rules.forEach(rule -> output.append(rule.toString()).append('\n'));
return output.toString();
}
}