/**
* Copyright 2002-2017 Evgeny Gryaznov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.textmapper.tool.parser.action;
import java.io.IOException;
import java.io.Reader;
import java.text.MessageFormat;
public abstract class SActionLexer {
public static class Span {
public Object value;
public int symbol;
public int state;
public int line;
public int offset;
}
public interface Tokens {
int Unavailable_ = -1;
int eoi = 0;
int Lbrace = 1;
int _skip = 2;
int Rbrace = 3;
}
public interface ErrorReporter {
void error(String message, int line, int offset);
}
public static final int TOKEN_SIZE = 2048;
private Reader stream;
final private ErrorReporter reporter;
private int chr;
private int state;
final private StringBuilder tokenBuffer = new StringBuilder(TOKEN_SIZE);
private int tokenLine;
private int currLine;
private int currOffset;
public SActionLexer(ErrorReporter reporter) throws IOException {
this.reporter = reporter;
reset();
}
public void reset() throws IOException {
this.state = 0;
tokenLine = currLine = 1;
currOffset = 0;
chr = nextChar();
}
protected abstract int nextChar() throws IOException;
protected void advance() throws IOException {
if (chr == -1) return;
if (chr == '\n') {
currLine++;
}
if (chr >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
tokenBuffer.append(Character.toChars(chr));
} else {
tokenBuffer.append((char) chr);
}
chr = nextChar();
}
public int getState() {
return state;
}
public void setState(int state) {
this.state = state;
}
public int getTokenLine() {
return tokenLine;
}
public int getLine() {
return currLine;
}
public void setLine(int currLine) {
this.currLine = currLine;
}
public int getOffset() {
return currOffset;
}
public void setOffset(int currOffset) {
this.currOffset = currOffset;
}
public String tokenText() {
return tokenBuffer.toString();
}
public int tokenSize() {
return tokenBuffer.length();
}
private static final short tmCharClass[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 6, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 7
};
private static final int[] tmRuleSymbol = unpack_int(7,
"\uffff\uffff\0\0\1\0\2\0\2\0\2\0\3\0");
private static final int tmClassesCount = 8;
private static final short[] tmGoto = unpack_vc_short(80,
"\1\ufffe\1\11\1\10\1\5\2\11\1\2\1\1\10\ufff9\1\uffff\3\2\1\4\1\uffff\1\3\1\2\10\ufffb" +
"\1\uffff\4\2\1\uffff\2\2\1\uffff\2\5\1\7\1\6\1\uffff\2\5\1\uffff\4\5\1\uffff\2\5" +
"\10\ufffc\10\ufffd\1\ufffa\1\11\2\ufffa\2\11\2\ufffa");
private static short[] unpack_vc_short(int size, String... st) {
short[] res = new short[size];
int t = 0;
int count = 0;
for (String s : st) {
int slen = s.length();
for (int i = 0; i < slen; ) {
count = i > 0 || count == 0 ? s.charAt(i++) : count;
if (i < slen) {
short val = (short) s.charAt(i++);
while (count-- > 0) res[t++] = val;
}
}
}
assert res.length == t;
return res;
}
private static int mapCharacter(int chr) {
if (chr >= 0 && chr < 126) return tmCharClass[chr];
return chr == -1 ? 0 : 1;
}
public Span next() throws IOException {
Span token = new Span();
int state;
tokenloop:
do {
token.offset = currOffset;
tokenLine = token.line = currLine;
if (tokenBuffer.length() > TOKEN_SIZE) {
tokenBuffer.setLength(TOKEN_SIZE);
tokenBuffer.trimToSize();
}
tokenBuffer.setLength(0);
for (state = this.state; state >= 0; ) {
state = tmGoto[state * tmClassesCount + mapCharacter(chr)];
if (state == -1 && chr == -1) {
token.symbol = 0;
token.value = null;
reporter.error("Unexpected end of input reached", token.line, token.offset);
token.offset = currOffset;
break tokenloop;
}
if (state >= -1 && chr != -1) {
if (chr == '\n') {
currLine++;
}
if (chr >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
tokenBuffer.append(Character.toChars(chr));
} else {
tokenBuffer.append((char) chr);
}
chr = nextChar();
}
}
token.symbol = tmRuleSymbol[-1 - state];
token.value = null;
if (token.symbol == -1) {
reporter.error(MessageFormat.format("invalid token at line {0}: `{1}`, skipped", currLine, tokenText()), token.line, token.offset);
}
} while (token.symbol == -1 || !createToken(token, -1 - state));
return token;
}
protected boolean createToken(Span token, int ruleIndex) throws IOException {
boolean spaceToken = false;
switch (ruleIndex) {
case 3: // _skip: /'([^\n\\']|\\.)*'/
spaceToken = true;
break;
case 4: // _skip: /"([^\n\\"]|\\.)*"/
spaceToken = true;
break;
case 5: // _skip: /[^'"{}]+/
spaceToken = true;
break;
}
return !(spaceToken);
}
/* package */ static int[] unpack_int(int size, String... st) {
int[] res = new int[size];
boolean second = false;
char first = 0;
int t = 0;
for (String s : st) {
int slen = s.length();
for (int i = 0; i < slen; i++) {
if (second) {
res[t++] = (s.charAt(i) << 16) + first;
} else {
first = s.charAt(i);
}
second = !second;
}
}
assert !second;
assert res.length == t;
return res;
}
}