/*
* Created on Mar 19, 2003
*
* @author henkel@cs.colorado.edu
*
*/
package bibtex.parser;
import java.io.IOException;
import java.io.Reader;
/**
* this is not a real lexer, since bibtex is such an insane format...
*
* @author henkel
*/
final class PseudoLexer {
static final class Token {
Token(int choice, String content, int line, int column) {
this.choice = choice;
this.content = content;
this.line = line;
this.column = column;
}
final int choice;
final String content;
final int line, column;
}
private final LookAheadReader input;
private Token eofToken = null;
PseudoLexer(Reader input) throws IOException {
this.input = new LookAheadReader(input);
}
Token getNextToken() {
return null;
}
/**
* if it's a top level comment, result.choice will be 0, for @ 1, for EOF 2.
*
* @return Token
*/
public Token scanTopLevelCommentOrAtOrEOF() throws IOException {
skipWhitespace();
if (eofToken != null) {
return new Token(2, eofToken.content, eofToken.line, eofToken.column);
}
final int column = input.getColumn(), line = input.getLine();
if (input.getCurrent() == '@') {
input.step();
return new Token(1, "@", line, column);
}
StringBuffer content = new StringBuffer();
while (!input.eof() && input.getCurrent() != '@') {
content.append(input.getCurrent());
input.step();
}
return new Token(0, content.toString(), line, column);
}
/**
* the return value is an index into alternatives. If lookAhead is true we
* will not move forward ...
*
* @param alternatives
* @return int
*/
public final int scanAlternatives(char[] alternatives, boolean lookAhead)
throws IOException, ParseException {
skipWhitespace();
if (eofToken != null)
throw new ParseException(
eofToken.line,
eofToken.column,
"[EOF]",
alternativesToString(alternatives));
final int line = input.getLine(), column = input.getColumn();
for (int i = 0; i < alternatives.length; i++) {
if (alternatives[i] == input.getCurrent()) {
if (!lookAhead)
input.step();
return i;
}
}
if (!lookAhead)
input.step();
throw new ParseException(line, column, "" + input.getCurrent(), alternativesToString(alternatives));
}
// /**
// * this one is case insensitive!
// *
// * @param alternatives
// * @return Token
// * @throws ParseException
// * @throws IOException
// */
//
// public final Token scanAlternatives(String[] alternatives)
// throws ParseException, IOException {
// skipWhitespace();
// if (eofToken != null)
// throw new ParseException(
// eofToken.line,
// eofToken.column,
// "[EOF]",
// alternativesToString(alternatives));
// final int line = input.getLine();
// final int column = input.getColumn();
// HashMap amap = new HashMap();
// int maxLength = 0;
// for (int i = 0; i < alternatives.length; i++) {
// amap.put(alternatives[i], new Integer(i));
// if (alternatives[i].length() > maxLength)
// maxLength = alternatives[i].length();
// }
// String content = "";
// String lowerCaseContent = "";
// for (int length = 1; length <= maxLength; length++) {
// content += input.getCurrent();
// lowerCaseContent += Character.toLowerCase(input.getCurrent());
// input.step();
//
// if (amap.containsKey(lowerCaseContent)) {
// return new Token(
// ((Integer) amap.get(lowerCaseContent)).intValue(),
// content,
// line,
// column);
// }
// }
// throw new ParseException(line, column, content,
// alternativesToString(alternatives));
// }
public String scanLiteral(char[] terminationSet, boolean excludeWhitespace, boolean enforceNonzero)
throws ParseException, IOException {
StringBuffer buffer = new StringBuffer();
scanLiteral(terminationSet, excludeWhitespace, enforceNonzero, buffer);
return buffer.toString();
}
/**
* the return value is an index into the termination set the result is
* appended in the resultTargetBuffer
*
* @return Token
*/
public int scanLiteral(
char[] terminationSet,
boolean excludeWhitespace,
boolean enforceNonzero,
StringBuffer resultTargetBuffer)
throws ParseException, IOException {
if (excludeWhitespace) {
skipWhitespace();
if (eofToken != null)
throw new ParseException(
eofToken.line,
eofToken.column,
"[EOF]",
"not (" + alternativesToString(terminationSet) + " or [whitespace])");
} else
enforceNoEof("not (" + alternativesToString(terminationSet) + ")", false);
final int line = input.getLine(), column = input.getColumn();
int indexIntoTerminationSet = -1;
final int initialResultTargetBufferLength = resultTargetBuffer.length();
while (true) {
if (input.eof())
break;
final char inputChar = input.getCurrent();
indexIntoTerminationSet = index(terminationSet, inputChar);
if (indexIntoTerminationSet >= 0 || excludeWhitespace && Character.isWhitespace(inputChar)) {
break;
} else {
input.step();
resultTargetBuffer.append(inputChar);
}
}
if (resultTargetBuffer.length() > initialResultTargetBufferLength || !enforceNonzero) {
return indexIntoTerminationSet;
} else {
throw new ParseException(
line,
column,
"" + input.getCurrent(),
"not (" + alternativesToString(terminationSet) + " or [whitespace])");
}
}
private static final char[] QUOTE_OR_LBRACE = new char[] { '\"', '{' };
public String scanQuotedString() throws IOException, ParseException {
StringBuffer content = new StringBuffer();
scan('"');
while (true) {
final int choice = this.scanLiteral(QUOTE_OR_LBRACE, false, false, content);
if (choice == 0) { // we terminated with '"'
break;
} else { // we found a '{'
scanBracketedString(content, true);
}
}
scan('"');
return content.toString();
}
private final char[] RBRACE_LBRACE = new char[] { '}', '{' };
public void scanBracketedString(StringBuffer targetBuffer, boolean includeOuterBraces)
throws ParseException, IOException {
scan('{');
if (includeOuterBraces)
targetBuffer.append('{');
while (true) {
final int choice = this.scanLiteral(RBRACE_LBRACE, false, false, targetBuffer);
if (choice == 0) { // we terminated with '}'
break;
} else { // we terminated with '{'
scanBracketedString(targetBuffer, true);
}
}
scan('}');
if (includeOuterBraces)
targetBuffer.append("}");
}
public String scanEntryTypeName() throws ParseException, IOException {
skipWhitespace();
if (eofToken != null)
throw new ParseException(eofToken.line, eofToken.column, "[EOF]", "[a..z,A..Z]");
final int line = input.getLine(), column = input.getColumn();
StringBuffer result = new StringBuffer();
while (true) {
enforceNoEof("[a..z,A..Z]", false);
char inputChar = input.getCurrent();
if (inputChar >= 'a' && inputChar <= 'z' || inputChar >= 'A' && inputChar <= 'Z') {
result.append(inputChar);
input.step();
} else {
break;
}
}
if (result.length() == 0) {
throw new ParseException(line, column, "" + input.getCurrent(), "[a..z,A..Z]");
}
return result.toString();
}
public void scan(char expected) throws ParseException, IOException {
skipWhitespace();
if (eofToken != null)
throw new ParseException(eofToken.line, eofToken.column, "[EOF]", "" + expected);
final char encountered = input.getCurrent();
if (encountered != expected) {
final int line = input.getLine(), column = input.getColumn();
input.step();
throw new ParseException(line, column, "" + encountered, "" + expected);
} else input.step();
}
public void skipWhitespace() throws IOException {
if (eofToken != null)
return;
while (!input.eof() && Character.isWhitespace(input.getCurrent()))
input.step();
if (input.eof()) {
eofToken = new Token(-1, null, input.getLine(), input.getColumn());
}
}
/**
* make sure you call
*
* @return boolean
*/
public void enforceNoEof(String expected, boolean skipWhiteSpace) throws ParseException, IOException {
if (skipWhiteSpace)
skipWhitespace();
else if (input.eof()) {
eofToken = new Token(-1, null, input.getLine(), input.getColumn());
}
if (eofToken != null)
throw new ParseException(eofToken.line, eofToken.column, "[EOF]", "" + expected);
}
/**
* make sure to query enforceNoEof first!
*
* @return char
*/
public char currentInputChar() {
return input.getCurrent();
}
private static String alternativesToString(char[] alternatives) {
StringBuffer buffer = new StringBuffer();
buffer.append("one of ");
for (int i = 0; i < alternatives.length; i++) {
if (i != 0)
buffer.append(',');
buffer.append('\'');
buffer.append(alternatives[i]);
buffer.append('\'');
}
return buffer.toString();
}
private static String alternativesToString(Object[] alternatives) {
StringBuffer buffer = new StringBuffer();
buffer.append("one of ");
for (int i = 0; i < alternatives.length; i++) {
if (i != 0)
buffer.append(',');
buffer.append('\'');
buffer.append(alternatives[i]);
buffer.append('\'');
}
return buffer.toString();
}
private static int index(char[] container, char element) {
for (int i = 0; i < container.length; i++) {
if (container[i] == element)
return i;
}
return -1;
}
}