/**
* Copyright 2002-2017 Evgeny Gryaznov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.textmapper.xml;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import org.textmapper.xml.XmlLexer.ErrorReporter;
import org.textmapper.xml.XmlLexer.Span;
import org.textmapper.xml.XmlLexer.Tokens;
import org.textmapper.xml.XmlTree.TextSource;
public class XmlParser {
public static class ParseException extends Exception {
private static final long serialVersionUID = 1L;
public ParseException() {
}
}
private final ErrorReporter reporter;
public XmlParser(ErrorReporter reporter) {
this.reporter = reporter;
}
private static final boolean DEBUG_SYNTAX = false;
TextSource source;
private XmlData getData(int start, int end) {
return new XmlData(source.getContents(), start, end-start);
}
private void checkTag(XmlNode node, String endTag, int offset, int endoffset, int line) {
if (!node.getTagName().equals(endTag))
reporter.error("Tag " + node.getTagName() + " is closed with " + endTag, line, offset, endoffset);
}
private static final int[] tmAction = XmlLexer.unpack_int(31,
"\uffff\uffff\6\0\uffff\uffff\ufffd\uffff\2\0\uffff\uffff\5\0\ufff5\uffff\uffeb\uffff" +
"\1\0\uffff\uffff\uffff\uffff\3\0\uffff\uffff\uffff\uffff\uffe3\uffff\15\0\uffff\uffff" +
"\uffff\uffff\4\0\10\0\uffff\uffff\14\0\11\0\uffff\uffff\uffff\uffff\16\0\12\0\13" +
"\0\uffff\uffff\ufffe\uffff");
private static final int[] tmLalr = XmlLexer.unpack_int(34,
"\1\0\uffff\uffff\2\0\uffff\uffff\0\0\0\0\uffff\uffff\ufffe\uffff\10\0\uffff\uffff" +
"\4\0\7\0\6\0\7\0\11\0\7\0\uffff\uffff\ufffe\uffff\4\0\uffff\uffff\6\0\20\0\11\0\20" +
"\0\uffff\uffff\ufffe\uffff\4\0\uffff\uffff\6\0\17\0\11\0\17\0\uffff\uffff\ufffe\uffff");
private static final int[] lapg_sym_goto = XmlLexer.unpack_int(22,
"\0\0\1\0\5\0\11\0\11\0\17\0\20\0\23\0\24\0\25\0\27\0\27\0\30\0\32\0\36\0\41\0\45" +
"\0\51\0\53\0\54\0\56\0\57\0");
private static final int[] lapg_sym_from = XmlLexer.unpack_int(47,
"\35\0\0\0\3\0\5\0\13\0\0\0\3\0\5\0\13\0\2\0\10\0\12\0\15\0\17\0\22\0\25\0\21\0\30" +
"\0\31\0\16\0\7\0\12\0\21\0\0\0\0\0\5\0\0\0\3\0\5\0\13\0\2\0\12\0\22\0\0\0\3\0\5\0" +
"\13\0\0\0\3\0\5\0\13\0\5\0\13\0\10\0\10\0\17\0\10\0");
private static final int[] lapg_sym_to = XmlLexer.unpack_int(47,
"\36\0\1\0\1\0\1\0\1\0\2\0\2\0\12\0\12\0\7\0\16\0\7\0\24\0\16\0\7\0\32\0\27\0\33\0" +
"\34\0\25\0\15\0\22\0\30\0\35\0\3\0\13\0\4\0\11\0\4\0\11\0\10\0\10\0\31\0\5\0\5\0" +
"\5\0\5\0\6\0\6\0\6\0\6\0\14\0\23\0\17\0\20\0\26\0\21\0");
private static final int[] tmRuleLen = XmlLexer.unpack_int(17,
"\1\0\2\0\1\0\2\0\3\0\1\0\1\0\1\0\3\0\4\0\5\0\4\0\2\0\1\0\3\0\1\0\0\0");
private static final int[] tmRuleSymbol = XmlLexer.unpack_int(17,
"\13\0\14\0\14\0\15\0\15\0\15\0\15\0\16\0\16\0\17\0\20\0\21\0\22\0\22\0\23\0\24\0" +
"\24\0");
protected static final String[] tmSymbolNames = new String[] {
"eoi",
"any",
"'<'",
"_skipcomment",
"identifier",
"ccon",
"'>'",
"'='",
"':'",
"'/'",
"_skip",
"input",
"xml_tags",
"xml_tag_or_space",
"tag_name",
"tag_start",
"no_body_tag",
"tag_end",
"attributes",
"attribute",
"attributesopt",
};
public interface Nonterminals extends Tokens {
// non-terminals
int input = 11;
int xml_tags = 12;
int xml_tag_or_space = 13;
int tag_name = 14;
int tag_start = 15;
int no_body_tag = 16;
int tag_end = 17;
int attributes = 18;
int attribute = 19;
int attributesopt = 20;
}
/**
* -3-n Lookahead (state id)
* -2 Error
* -1 Shift
* 0..n Reduce (rule index)
*/
protected static int tmAction(int state, int symbol) {
int p;
if (tmAction[state] < -2) {
for (p = -tmAction[state] - 3; tmLalr[p] >= 0; p += 2) {
if (tmLalr[p] == symbol) {
break;
}
}
return tmLalr[p + 1];
}
return tmAction[state];
}
protected static int tmGoto(int state, int symbol) {
int min = lapg_sym_goto[symbol], max = lapg_sym_goto[symbol + 1] - 1;
int i, e;
while (min <= max) {
e = (min + max) >> 1;
i = lapg_sym_from[e];
if (i == state) {
return lapg_sym_to[e];
} else if (i < state) {
min = e + 1;
} else {
max = e - 1;
}
}
return -1;
}
protected int tmHead;
protected Span[] tmStack;
protected Span tmNext;
protected XmlLexer tmLexer;
public XmlNode parse(XmlLexer lexer) throws IOException, ParseException {
tmLexer = lexer;
tmStack = new Span[1024];
tmHead = 0;
tmStack[0] = new Span();
tmStack[0].state = 0;
tmNext = tmLexer.next();
while (tmStack[tmHead].state != 30) {
int action = tmAction(tmStack[tmHead].state, tmNext.symbol);
if (action >= 0) {
reduce(action);
} else if (action == -1) {
shift();
}
if (action == -2 || tmStack[tmHead].state == -1) {
break;
}
}
if (tmStack[tmHead].state != 30) {
reporter.error(MessageFormat.format("syntax error before line {0}",
tmLexer.getTokenLine()), tmNext.line, tmNext.offset, tmNext.endoffset);
throw new ParseException();
}
return (XmlNode)tmStack[tmHead - 1].value;
}
protected void shift() throws IOException {
tmStack[++tmHead] = tmNext;
tmStack[tmHead].state = tmGoto(tmStack[tmHead - 1].state, tmNext.symbol);
if (DEBUG_SYNTAX) {
System.out.println(MessageFormat.format("shift: {0} ({1})", tmSymbolNames[tmNext.symbol], tmLexer.tokenText()));
}
if (tmStack[tmHead].state != -1 && tmNext.symbol != 0) {
tmNext = tmLexer.next();
}
}
protected void reduce(int rule) {
Span left = new Span();
left.value = (tmRuleLen[rule] != 0) ? tmStack[tmHead + 1 - tmRuleLen[rule]].value : null;
left.symbol = tmRuleSymbol[rule];
left.state = 0;
if (DEBUG_SYNTAX) {
System.out.println("reduce to " + tmSymbolNames[tmRuleSymbol[rule]]);
}
Span startsym = (tmRuleLen[rule] != 0) ? tmStack[tmHead + 1 - tmRuleLen[rule]] : tmNext;
left.line = startsym.line;
left.offset = startsym.offset;
left.endoffset = (tmRuleLen[rule] != 0) ? tmStack[tmHead].endoffset : tmNext.offset;
applyRule(left, rule, tmRuleLen[rule]);
for (int e = tmRuleLen[rule]; e > 0; e--) {
tmStack[tmHead--] = null;
}
tmStack[++tmHead] = left;
tmStack[tmHead].state = tmGoto(tmStack[tmHead - 1].state, left.symbol);
}
@SuppressWarnings("unchecked")
protected void applyRule(Span tmLeft, int ruleIndex, int ruleLength) {
switch (ruleIndex) {
case 0: // input : xml_tags
{ tmLeft.value = new XmlNode("<root>", null, 1); ((XmlNode)tmLeft.value).setData(((List<XmlElement>)tmStack[tmHead].value)); }
break;
case 1: // xml_tags : xml_tags xml_tag_or_space
{ ((List<XmlElement>)tmStack[tmHead - 1].value).add(((XmlElement)tmStack[tmHead].value)); }
break;
case 2: // xml_tags : xml_tag_or_space
{ tmLeft.value = new ArrayList<XmlElement>(); ((List<XmlElement>)tmLeft.value).add(((XmlElement)tmStack[tmHead].value)); }
break;
case 3: // xml_tag_or_space : tag_start tag_end
{ checkTag(((XmlNode)tmStack[tmHead - 1].value),((String)tmStack[tmHead].value),tmStack[tmHead].offset,tmStack[tmHead].endoffset,tmStack[tmHead].line); }
break;
case 4: // xml_tag_or_space : tag_start xml_tags tag_end
{ checkTag(((XmlNode)tmStack[tmHead - 2].value),((String)tmStack[tmHead].value),tmStack[tmHead].offset,tmStack[tmHead].endoffset,tmStack[tmHead].line); ((XmlNode)tmStack[tmHead - 2].value).setData(((List<XmlElement>)tmStack[tmHead - 1].value)); }
break;
case 6: // xml_tag_or_space : any
{ tmLeft.value = getData(tmLeft.offset,tmLeft.endoffset); }
break;
case 7: // tag_name : identifier
{ tmLeft.value = ((String)tmStack[tmHead].value); }
break;
case 8: // tag_name : identifier ':' identifier
{ tmLeft.value = ((String)tmStack[tmHead - 2].value) + ":" + ((String)tmStack[tmHead].value); }
break;
case 9: // tag_start : '<' tag_name attributesopt '>'
{ tmLeft.value = new XmlNode(((String)tmStack[tmHead - 2].value), ((List<XmlAttribute>)tmStack[tmHead - 1].value), tmStack[tmHead - 3].line); }
break;
case 10: // no_body_tag : '<' tag_name attributesopt '/' '>'
{ tmLeft.value = new XmlNode(((String)tmStack[tmHead - 3].value), ((List<XmlAttribute>)tmStack[tmHead - 2].value), tmStack[tmHead - 4].line); }
break;
case 11: // tag_end : '<' '/' tag_name '>'
{ tmLeft.value = ((String)tmStack[tmHead - 1].value); }
break;
case 12: // attributes : attributes attribute
{ ((List<XmlAttribute>)tmStack[tmHead - 1].value).add(((XmlAttribute)tmStack[tmHead].value)); }
break;
case 13: // attributes : attribute
{ tmLeft.value = new ArrayList<XmlAttribute>(); ((List<XmlAttribute>)tmLeft.value).add(((XmlAttribute)tmStack[tmHead].value)); }
break;
case 14: // attribute : identifier '=' ccon
{ tmLeft.value = new XmlAttribute(((String)tmStack[tmHead - 2].value),((String)tmStack[tmHead].value)); }
break;
}
}
}