/** * Copyright 2004-2016 Riccardo Solmi. All rights reserved. * This file is part of the Whole Platform. * * The Whole Platform is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The Whole Platform is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the Whole Platform. If not, see <http://www.gnu.org/licenses/>. */ package org.whole.lang.grammars.parsers; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.ListIterator; import java.util.Set; import org.whole.lang.builders.IBuilder; import org.whole.lang.builders.IBuilderOperation; import org.whole.lang.grammars.factories.GrammarsEntityFactory; import org.whole.lang.grammars.model.As; import org.whole.lang.grammars.model.Concatenate; import org.whole.lang.grammars.model.Grammar; import org.whole.lang.grammars.model.NonTerminal; import org.whole.lang.grammars.model.Optional; import org.whole.lang.grammars.model.Production; import org.whole.lang.grammars.model.Repeat; import org.whole.lang.grammars.model.Rule; import org.whole.lang.grammars.reflect.GrammarsEntityDescriptorEnum; import org.whole.lang.grammars.reflect.GrammarsFeatureDescriptorEnum; import org.whole.lang.grammars.reflect.GrammarsLanguageKit; import org.whole.lang.grammars.util.GrammarsUtils; import org.whole.lang.iterators.IEntityIterator; import org.whole.lang.iterators.IteratorFactory; import org.whole.lang.model.IEntity; import org.whole.lang.operations.NormalizerOperation; import org.whole.lang.parsers.Lexer; import org.whole.lang.reflect.EntityDescriptor; import org.whole.lang.reflect.EntityDescriptorEnum; import org.whole.lang.reflect.ILanguageKit; import org.whole.lang.reflect.ReflectionFactory; import org.whole.lang.templates.AbstractTemplateFactory; import org.whole.lang.util.EntityUtils; /** * @author Riccardo Solmi */ public class ParserTemplateFactory<E extends IEntity> extends AbstractTemplateFactory<E> { private Grammar grammar; private NonTerminal startSymbol; private List<ActivationState> currentStates = new ArrayList<ActivationState>(); private Lexer lexer; private Lexer.Memento lexerMemento; private int lookaheadLevel; private ILanguageKit languageKit; private EntityDescriptorEnum edEnum; private EntityDescriptor<? extends IEntity> ed; private IBuilderOperation operation; private IBuilder builder; public ParserTemplateFactory(Lexer lexer, Grammar grammar) { this(lexer, grammar, grammar.getStartSymbol()); } public ParserTemplateFactory(Lexer lexer, Grammar grammar, String startSymbol) { this(lexer, grammar, GrammarsEntityFactory.instance.createNonTerminal(startSymbol)); } public ParserTemplateFactory(Lexer lexer, Grammar grammar, NonTerminal startSymbol) { this.lexer = lexer; this.grammar = normalize(grammar); this.startSymbol = startSymbol; this.languageKit = ReflectionFactory.getLanguageKit(GrammarsUtils.getLanguageURI(grammar), false, null); this.edEnum = languageKit.getEntityDescriptorEnum(); } public void apply(IBuilderOperation operation) { this.operation = operation; builder = operation.wGetBuilder(GrammarsLanguageKit.URI);//FIXME workaround to get a generic builder currentStates.add(new ActivationState(null, createActiveRule(startSymbol))); //TODO && currentStates contains a terminal state while (!lexer.hitEnd()) { propagateActivation();//TODO return terminalSet completeProductions(filterActivation(matchTerminals())); } } public void enterLookaheadMode() { if (++lookaheadLevel == 1) { lexerMemento = lexer.mark(); disableActions(); } } protected void disableActions() { } public void exitLookaheadMode() { if (--lookaheadLevel == 0) { lexer.reset(lexerMemento); enableActions(); } } protected void enableActions() { } /* * For every ActiveRule in currentStates starting with a NonTerminal * add its production to the current state. * For each current state, calculate the set of productions added * and mark with the recursive flag rules performing a recursive call. */ public Set<ActiveRule> propagateActivation() { Set<ActiveRule> terminalSet = new HashSet<ActiveRule>(); for (ActivationState as : currentStates) { Set<NonTerminal> recursiveSet = new HashSet<NonTerminal>(); ListIterator<ActiveRule> activeRules = as.getActiveRules().listIterator(); while (activeRules.hasNext()) { propagateActivation(terminalSet, recursiveSet, activeRules, activeRules.next()); } } return terminalSet; } protected void propagateActivation(Set<ActiveRule> terminalSet, Set<NonTerminal> recursiveSet, ListIterator<ActiveRule> activeRules, ActiveRule ar) { Rule dotRule = ar.getDotRule(); switch (dotRule.wGetEntityOrd()) { case GrammarsEntityDescriptorEnum.DataTerminal_ord: case GrammarsEntityDescriptorEnum.LiteralTerminal_ord: terminalSet.add(ar); break; case GrammarsEntityDescriptorEnum.NonTerminal_ord: NonTerminal nt = (NonTerminal) dotRule; if (recursiveSet.add(nt)) { activeRules.add(ar = createActiveRule(nt)); propagateActivation(terminalSet, recursiveSet, activeRules, ar); } else ar.setRecursive(true); break; case GrammarsEntityDescriptorEnum.Concatenate_ord: Concatenate c = (Concatenate) dotRule; if (c.wSize() > 0) { ar.setDotRule((Rule) c.wGet(0)); propagateActivation(terminalSet, recursiveSet, activeRules, ar); } break; case GrammarsEntityDescriptorEnum.Optional_ord: Optional o = (Optional) dotRule; ar.setDotRule(o.getRule()); propagateActivation(terminalSet, recursiveSet, activeRules, ar); //TODO add also the rule after the optional break; case GrammarsEntityDescriptorEnum.Repeat_ord: Repeat r = (Repeat) dotRule; //TODO check lower bound and split active state for repeat continuation if 0 ar.setDotRule(r.getRule()); propagateActivation(terminalSet, recursiveSet, activeRules, ar); break; case GrammarsEntityDescriptorEnum.As_ord: As as = (As) dotRule; ar.setDotRule(as.getRule()); propagateActivation(terminalSet, recursiveSet, activeRules, ar); break; } } protected ActiveRule createActiveRule(NonTerminal nt) { Production p = getProduction(nt); ed = edEnum.valueOf(nt.getValue());//FIXME mapping if (!ed.getEntityKind().isData()) builder.wEntity_(ed); return new ActiveRule(p.getName(), p.getRule()); } protected Production getProduction(NonTerminal nt) { //TODO cache in a map IEntityIterator<Production> pi = IteratorFactory.<Production>childIterator(); pi.reset(grammar.getPhraseStructure()); for (Production p : pi) if (p.getName().wEquals(nt)) return p; IEntityIterator<Production> li = IteratorFactory.<Production>childIterator(); li.reset(grammar.getLexicalStructure()); for (Production p : li) if (p.getName().wEquals(nt)) return p; throw new IllegalArgumentException("Production not found: "+nt); } /* * For every ActiveRule in currentStates starting with a Terminal * match it with the lexer and advance or remove it. * */ public Set<ActiveRule> matchTerminals() { Set<ActiveRule> matchSet = new HashSet<ActiveRule>(); java.util.regex.Pattern pattern = null; for (ActivationState as : currentStates) { ListIterator<ActiveRule> activeRules = as.getActiveRules().listIterator(); while (activeRules.hasNext()) { ActiveRule ar = activeRules.next(); Rule dotRule = ar.getDotRule(); switch (dotRule.wGetEntityOrd()) { case GrammarsEntityDescriptorEnum.DataTerminal_ord: case GrammarsEntityDescriptorEnum.LiteralTerminal_ord: java.util.regex.Pattern p = (java.util.regex.Pattern) dotRule.wGet(GrammarsFeatureDescriptorEnum.pattern).wGetValue(); lexer.withPattern(p); if (lexer.hasNext()) { if (pattern == null || p.pattern().equals(pattern.pattern())) { pattern = p; matchSet.add(ar); String token = lexer.next(); if (dotRule.wGetEntityDescriptor().equals(GrammarsEntityDescriptorEnum.DataTerminal)) builder.wEntity(ed, token); } else throw new IllegalStateException("Backtracking is required for processing multiple matching patterns: ..., "+pattern.pattern()); } else activeRules.remove(); } } } return matchSet; } /* * */ public Set<ActiveRule> filterActivation(Set<ActiveRule> matchSet) { if (matchSet.isEmpty()) throw new IllegalStateException("ParserException");//TODO ParserException Set<ActiveRule> completeSet = new HashSet<ActiveRule>(); //TODO filter rules not advanced from the matchSet for (ActiveRule ar : matchSet) { updateRule(completeSet, ar); } return completeSet; } protected void updateRule(Set<ActiveRule> completeSet, ActiveRule ar) { updateRule(completeSet, ar, ar.getDotRule()); } protected void updateRule(Set<ActiveRule> completeSet, ActiveRule ar, Rule dotRule) { Rule dotRuleParent = (Rule) dotRule.wGetParent(); switch (dotRuleParent.wGetEntityOrd()) { case GrammarsEntityDescriptorEnum.Concatenate_ord: int dotRuleIndex = dotRuleParent.wIndexOf(dotRule); if (dotRuleIndex+1 < dotRuleParent.wSize()) ar.setDotRule((Rule) dotRuleParent.wGet(dotRuleIndex+1)); else updateRule(completeSet, ar, dotRuleParent); break; case GrammarsEntityDescriptorEnum.Optional_ord: case GrammarsEntityDescriptorEnum.As_ord: updateRule(completeSet, ar, dotRuleParent); break; case GrammarsEntityDescriptorEnum.Production_ord: ar.setDotRule(dotRuleParent); completeSet.add(ar); break; //TODO repeat behavior } } /* * For every empty ActiveRule in currentStates * remove it and advance rules waiting on the production NonTerminal */ public void completeProductions(Set<ActiveRule> completeSet) { if (completeSet.isEmpty()) return; for (ActivationState as : currentStates) { ListIterator<ActiveRule> activeRules = as.getActiveRules().listIterator(); while (activeRules.hasNext()) { ActiveRule ar = activeRules.next(); if (completeSet.contains(ar)) { EntityDescriptor<? extends IEntity> ed = edEnum.valueOf(ar.getNt().getValue());//FIXME mapping if (!ed.getEntityKind().isData()) builder._wEntity(ed); activeRules.remove(); } else { Rule dotRule = ar.getDotRule(); if (dotRule.wGetEntityDescriptor().equals(GrammarsEntityDescriptorEnum.NonTerminal)) for (ActiveRule cr : completeSet) if (cr.getNt().wEquals(dotRule)) updateRule(completeSet, ar, dotRule); //TODO use another completeSet } } } } public static Grammar normalize(Grammar grammar) { grammar = EntityUtils.clone(grammar); NormalizerOperation.normalize(grammar); GrammarsUtils.ensureCompiledPatterns(grammar); return grammar; } @Override public String toString() { return currentStates.toString(); } }