/** * Copyright 2002-2017 Evgeny Gryaznov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.textmapper.lapg.eval; import java.io.IOException; import java.text.MessageFormat; import java.util.ArrayList; import java.util.List; import org.textmapper.lapg.api.Grammar; import org.textmapper.lapg.api.ParserData; import org.textmapper.lapg.eval.GenericLexer.ErrorReporter; import org.textmapper.lapg.eval.GenericLexer.Span; import org.textmapper.lapg.eval.GenericLexer.Tokens; import org.textmapper.lapg.eval.GenericParseContext.TextSource; public class GenericParser { public static class ParseException extends Exception { private static final long serialVersionUID = 1L; public ParseException() { } } private final ErrorReporter reporter; protected TextSource source; protected final Grammar grammar; private final int[] tmAction; private final int[] tmLalr; private final int[] lapg_sym_goto; private final int[] lapg_sym_from; private final int[] lapg_sym_to; private final int[] tmRuleLen; private final int[] tmRuleSymbol; private final boolean debugSyntax; public GenericParser(ErrorReporter reporter, ParserData tables, Grammar grammar, boolean debugSyntax) { this.reporter = reporter; this.grammar = grammar; this.tmAction = tables.getAction(); this.tmLalr = tables.getLalr(); this.lapg_sym_goto = tables.getSymGoto(); this.lapg_sym_to = tables.getSymTo(); this.lapg_sym_from = tables.getSymFrom(); this.tmRuleLen = tables.getRuleLength(); this.tmRuleSymbol = tables.getLeft(); this.debugSyntax = debugSyntax; } /** * -3-n Lookahead (state id) * -2 Error * -1 Shift * 0..n Reduce (rule index) */ protected final int tmAction(int state, int symbol) { int p; if (tmAction[state] < -2) { if (symbol == Tokens.Unavailable_) { return -3 - state; } for (p = -tmAction[state] - 3; tmLalr[p] >= 0; p += 2) { if (tmLalr[p] == symbol) { break; } } return tmLalr[p + 1]; } return tmAction[state]; } protected final int tmGoto(int state, int symbol) { int min = lapg_sym_goto[symbol], max = lapg_sym_goto[symbol + 1] - 1; int i, e; while (min <= max) { e = (min + max) >> 1; i = lapg_sym_from[e]; if (i == state) { return lapg_sym_to[e]; } else if (i < state) { min = e + 1; } else { max = e - 1; } } return -1; } protected int tmHead; protected Span[] tmStack; protected Span tmNext; protected GenericLexer tmLexer; public Object parse(GenericLexer lexer, int initialState, int finalState, boolean noEoi) throws IOException, ParseException { tmLexer = lexer; tmStack = new Span[1024]; tmHead = 0; int tmShiftsAfterError = 4; tmStack[0] = new Span(); tmStack[0].state = initialState; tmNext = tmLexer.next(); while (tmStack[tmHead].state != finalState) { int action = tmAction(tmStack[tmHead].state, tmNext == null ? Tokens.Unavailable_ : tmNext.symbol); if (action <= -3 && tmNext == null) { tmNext = tmLexer.next(); action = tmAction(tmStack[tmHead].state, tmNext.symbol); } if (action >= 0) { reduce(action); } else if (action == -1) { shift(noEoi); tmShiftsAfterError++; } if (action == -2 || tmStack[tmHead].state == -1) { if (grammar.getError() == null) { break; } if (restore()) { if (tmShiftsAfterError >= 4) { reporter.error(MessageFormat.format("syntax error before line {0}", tmLexer.getTokenLine()), tmNext.line, tmNext.offset, tmNext.endoffset); } if (tmShiftsAfterError <= 1) { tmNext = tmLexer.next(); } tmShiftsAfterError = 0; continue; } if (tmHead < 0) { tmHead = 0; tmStack[0] = new Span(); tmStack[0].state = initialState; } break; } } if (tmStack[tmHead].state != finalState) { if (tmShiftsAfterError >= 4) { reporter.error(MessageFormat.format("syntax error before line {0}", tmLexer.getTokenLine()), tmNext == null ? tmLexer.getLine() : tmNext.line, tmNext == null ? tmLexer.getOffset() : tmNext.offset, tmNext == null ? tmLexer.getOffset() : tmNext.endoffset); } throw new ParseException(); } return tmStack[noEoi ? tmHead : tmHead - 1].value; } protected boolean restore() throws IOException { if (tmNext == null) { tmNext = tmLexer.next(); } if (tmNext.symbol == 0) { return false; } while (tmHead >= 0 && tmGoto(tmStack[tmHead].state, grammar.getError().getIndex()) == -1) { dispose(tmStack[tmHead]); tmStack[tmHead] = null; tmHead--; } if (tmHead >= 0) { tmStack[++tmHead] = new Span(); tmStack[tmHead].symbol = grammar.getError().getIndex(); tmStack[tmHead].value = null; tmStack[tmHead].state = tmGoto(tmStack[tmHead - 1].state, grammar.getError().getIndex()); tmStack[tmHead].line = tmNext.line; tmStack[tmHead].offset = tmNext.offset; tmStack[tmHead].endoffset = tmNext.endoffset; return true; } return false; } protected void shift(boolean lazy) throws IOException { if (tmNext == null) { tmNext = tmLexer.next(); } tmStack[++tmHead] = tmNext; tmStack[tmHead].state = tmGoto(tmStack[tmHead - 1].state, tmNext.symbol); if (debugSyntax) { System.out.println(MessageFormat.format("shift: {0} ({1})", grammar.getSymbols()[tmNext.symbol].getNameText(), tmLexer.tokenText())); } if (tmStack[tmHead].state != -1 && tmNext.symbol != 0) { tmNext = lazy ? null : tmLexer.next(); } } protected void reduce(int rule) { Span left = new Span(); left.value = (tmRuleLen[rule] != 0) ? tmStack[tmHead + 1 - tmRuleLen[rule]].value : null; left.symbol = tmRuleSymbol[rule]; left.state = 0; if (debugSyntax) { System.out.println("reduce to " + grammar.getSymbols()[tmRuleSymbol[rule]].getNameText()); } Span startsym = (tmRuleLen[rule] != 0) ? tmStack[tmHead + 1 - tmRuleLen[rule]] : tmNext; left.line = startsym == null ? tmLexer.getLine() : startsym.line; left.offset = startsym == null ? tmLexer.getOffset() : startsym.offset; left.endoffset = (tmRuleLen[rule] != 0) ? tmStack[tmHead].endoffset : tmNext == null ? tmLexer.getOffset() : tmNext.offset; applyRule(left, rule, tmRuleLen[rule]); for (int e = tmRuleLen[rule]; e > 0; e--) { cleanup(tmStack[tmHead]); tmStack[tmHead--] = null; } tmStack[++tmHead] = left; tmStack[tmHead].state = tmGoto(tmStack[tmHead - 1].state, left.symbol); } protected void applyRule(Span tmLeft, int rule, int ruleLength) { if (ruleLength == 1) { Object right = tmStack[tmHead].value; if (right instanceof GenericNode) { tmLeft.value = right; } else { tmLeft.value = new GenericNode(source, tmLeft.offset, tmLeft.endoffset); } } else if (ruleLength > 1) { List<GenericNode> children = new ArrayList<>(ruleLength); for (int i = ruleLength - 1; i >= 0; i--) { if (tmStack[tmHead - i].value instanceof GenericNode) { children.add((GenericNode) tmStack[tmHead - i].value); } } tmLeft.value = new GenericNode(source, tmLeft.offset, tmLeft.endoffset, children.toArray(new GenericNode[children.size()])); } } /** * disposes symbol dropped by error recovery mechanism */ protected void dispose(Span value) { } /** * cleans node removed from the stack */ protected void cleanup(Span value) { } }