/*
*
* Copyright 2012 lexergen.
* This file is part of lexergen.
*
* lexergen is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* lexergen is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with lexergen. If not, see <http://www.gnu.org/licenses/>.
*
* lexergen:
* A tool to chunk source code into tokens for further processing in a compiler chain.
*
* Projectgroup: bi, bii
*
* Authors: Johannes Dahlke
*
* Module: Softwareprojekt Übersetzerbau 2012
*
* Created: Apr. 2012
* Version: 1.0
*
*/
package de.fuberlin.bii.regextodfaconverter.directconverter.lrparser.grammar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import de.fuberlin.bii.utils.Sets;
import de.fuberlin.bii.utils.Test;
/**
* Datenstruktur zum Abbilden einer kontextfreien Grammatik.
*
* @author Johannes Dahlke
*
*/
@SuppressWarnings("rawtypes")
public class ContextFreeGrammar extends ProductionMap implements Grammar {
/**
*
*/
private static final long serialVersionUID = 5244247250895811777L;
private Nonterminal startSymbol = null;
private Set<Nonterminal> nonterminals = new HashSet<Nonterminal>();
private TerminalSet terminals = new TerminalSet();
public Nonterminal getStartSymbol() {
return startSymbol;
}
public void setStartSymbol( Nonterminal startSymbol) {
this.startSymbol = startSymbol;
}
@Override
public boolean addProduction(ProductionRule productionRule) {
if (Test.isUnassigned(productionRule))
return false;
Nonterminal leftRuleSide = productionRule.getLeftRuleSide();
if (Test.isUnassigned(startSymbol))
startSymbol = leftRuleSide;
terminals.addAll(productionRule.getTerminalSet());
nonterminals.addAll(productionRule.getNonterminalSet());
RuleElementSequenz rightRuleSide = productionRule.getRightRuleSide();
if (this.containsKey(leftRuleSide)) {
this.get(leftRuleSide).add(rightRuleSide);
} else {
HashSet<RuleElementSequenz> ruleSet = new HashSet<RuleElementSequenz>();
ruleSet.add(rightRuleSide);
this.put(leftRuleSide, ruleSet);
}
return true;
}
public Set<ProductionRule> getProductions() {
Set<ProductionRule> result = new HashSet<ProductionRule>();
for (Nonterminal nonterminal : this.keySet()) {
for (RuleElementSequenz ruleElementSequenz : this.get(nonterminal)) {
result.add(new ProductionRule(nonterminal, ruleElementSequenz));
}
}
return result;
}
private TerminalSet getFirstSetOfRuleElement(RuleElement ruleElement, HashMap<Nonterminal, TerminalSet> growingFirstSetTable) {
TerminalSet firstSet = new TerminalSet();
if (ruleElement instanceof EmptyString) {
firstSet.add((EmptyString) ruleElement);
return firstSet;
}
if (ruleElement instanceof Terminal) {
firstSet.add((Terminal) ruleElement);
return firstSet;
}
// otherwise ruleElement is an instance of Nonterminal
assert ruleElement instanceof Nonterminal;
Nonterminal nonterminal = (Nonterminal) ruleElement;
if (Test.isUnassigned(growingFirstSetTable))
growingFirstSetTable = new HashMap<Nonterminal, TerminalSet>();
if (growingFirstSetTable.containsKey(nonterminal))
return growingFirstSetTable.get(nonterminal);
growingFirstSetTable.put(nonterminal, new TerminalSet());
// we test for each rule defined for this nonterminal
for (RuleElementSequenz rightRuleSide : get(nonterminal)) {
ProductionRule currentProcessedRule = new ProductionRule(nonterminal, rightRuleSide);
firstSet = Sets.unionCollections(firstSet, getFirstSetOfRuleElementSequenz( rightRuleSide, growingFirstSetTable));
}
return firstSet;
}
private TerminalSet getFirstSetOfRuleElementSequenz(RuleElementSequenz ruleElementSequenz, HashMap<Nonterminal, TerminalSet> growingFirstSetTable) {
TerminalSet firstSet = new TerminalSet();
for (RuleElement rightRuleSideElement : ruleElementSequenz) {
if ( firstSet.contains(new EmptyString()))
firstSet.remove( new EmptyString());
TerminalSet firstSetOfCurrentElement = getFirstSetOfRuleElement(rightRuleSideElement, growingFirstSetTable);
// update table
if (rightRuleSideElement instanceof Nonterminal) {
growingFirstSetTable.put((Nonterminal) rightRuleSideElement, firstSetOfCurrentElement);
}
firstSet = Sets.unionCollections(firstSet, firstSetOfCurrentElement);
if (firstSetOfCurrentElement.size() > 0
&& !firstSetOfCurrentElement.contains(new EmptyString()))
break;
}
return firstSet;
}
public TerminalSet getFirstSetOfRuleElementSequenz(RuleElementSequenz ruleElementSequenz) {
HashMap<Nonterminal, TerminalSet> growingFirstSetTable = new HashMap<Nonterminal, TerminalSet>();
return getFirstSetOfRuleElementSequenz( ruleElementSequenz, growingFirstSetTable);
}
private static <K, V> Map<K, ? extends Set<V>> removeEmptySetsFromMap(Map<K, ? extends Set<V>> map) {
Set<K> keySet = new HashSet<K>(map.keySet());
for (K key : keySet) {
Set<V> currentSet = map.get(key);
if (currentSet != null)
if (currentSet.isEmpty())
map.remove(key);
}
return map;
}
public HashMap<Nonterminal, TerminalSet> getFirstSets() {
HashMap<Nonterminal, TerminalSet> result = new HashMap<Nonterminal, TerminalSet>();
// determine firstset over all productions of this grammar
for (Nonterminal nonterminal : this.keySet()) {
TerminalSet currentFirstSet = getFirstSetOfRuleElement(nonterminal, result);
result.put(nonterminal, currentFirstSet);
removeEmptySetsFromMap(result);
}
return result;
}
public HashMap<Nonterminal, TerminalSet> getFollowSets() {
boolean nothingAddedAnymore;
HashMap<Nonterminal, TerminalSet> result = new HashMap<Nonterminal, TerminalSet>();
// add $ in followset of start symbol
TerminalSet followSetOfStartSymbol = new TerminalSet();
followSetOfStartSymbol.add(new Terminator());
result.put(this.getStartSymbol(), followSetOfStartSymbol);
do {
nothingAddedAnymore = true;
for (Nonterminal nonterminal : this.keySet()) {
TerminalSet followSetOfCurrentNonterminal = getFollowSetOfRuleElement(nonterminal, result);
TerminalSet presentFollowSetOfCurrentNonterminal = result.get(nonterminal);
boolean elementsAdded = !(Test.isAssigned(presentFollowSetOfCurrentNonterminal)
&& presentFollowSetOfCurrentNonterminal.containsAll(followSetOfCurrentNonterminal));
if (elementsAdded) {
followSetOfCurrentNonterminal = Sets.unionCollections(followSetOfCurrentNonterminal, presentFollowSetOfCurrentNonterminal);
result.put(nonterminal, followSetOfCurrentNonterminal);
}
nothingAddedAnymore &= !elementsAdded;
}
} while (!nothingAddedAnymore);
return result;
}
private TerminalSet getFollowSetOfRuleElement(Nonterminal thisNonterminal, HashMap<Nonterminal, TerminalSet> growingFollowSetTable) {
TerminalSet result = new TerminalSet();
EmptyString emptyString = new EmptyString();
boolean isLastElementInSequenz = true;
// lookup in each rule for occurences of the given thisNonterminal
for (Nonterminal nonterminal : this.keySet()) {
// lookup in each rule for nonterminal for occurences of the given thisNonterminal
for (RuleElementSequenz ruleElementSequenz : this.get(nonterminal)) {
boolean startAccumulateFirstSet = false;
// therefore scan each element of the right rule side
for (int i = 0; i < ruleElementSequenz.size(); i++) {
isLastElementInSequenz = false;
// we have to decide two cases
// 1. A -> aBb => (FIRST(b) / {\epsilon}) \in FOLLOW(B)
// 2. A -> aB or A -> aBb mit b=\epsilon => FOLLOW(A) \subset FOLLOW(B)
// In both cases, we move forward until we read B or reaches the end of rule
RuleElement ruleElement = ruleElementSequenz.get(i);
if ( !startAccumulateFirstSet) {
startAccumulateFirstSet |= ruleElement.equals(thisNonterminal);
isLastElementInSequenz = true; // forehanded set to true
continue;
}
// we determine and accumulate the firstsets of the following elements until
// we read no more \epsilon
TerminalSet currentFirstSet = getFirstSetOfRuleElement(ruleElement, null);
if (currentFirstSet.contains(emptyString)) {
// we've read an \epsilon
if (ruleElementSequenz.size() > i + 1) {
// but there are more candidates to deliver a further \epsilon, so we remove it
currentFirstSet.remove(emptyString);
} else {
// otherwise we have case 2. So we add FOLLOW(A)
TerminalSet leftSideFollowset = growingFollowSetTable.get(thisNonterminal);
if ( Test.isAssigned( leftSideFollowset))
currentFirstSet.addAll(leftSideFollowset);
// and ensure there is no \epsilon in the follow set
currentFirstSet.remove(emptyString);
}
result = Sets.unionCollections(result, currentFirstSet);
} else {
// else there is no \epsilon in b so we simply add the set to result
result = Sets.unionCollections(result, currentFirstSet);
// and interrupt the accumulation of the firstsets. But continue with scanning.
// There could be another occurence of thisNonerminal
startAccumulateFirstSet = ruleElement.equals(thisNonterminal);
if ( startAccumulateFirstSet)
isLastElementInSequenz = true; // forehanded set to true
continue;
}
}
// add Follow(A) to Follow(B) if A -> aB
if (isLastElementInSequenz
&& startAccumulateFirstSet) {
result = Sets.unionCollections(result, growingFollowSetTable.get(nonterminal));
}
}
}
return result;
}
public TerminalSet getTerminals() {
return terminals;
}
public Set<Nonterminal> getNonterminals() {
return nonterminals;
}
}