/*
* Copyright (c) 2011, IETR/INSA of Rennes
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of the IETR/INSA of Rennes nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
package net.sf.orcc.util.sexp;
/**
* This class defines a parser of S-Expressions.
*
* @author Matthieu Wipliez
*
*/
public class SExpParser {
private static final boolean[] constituent = { false, // space
true, // !
false, // "
true, // #
true, // $
true, // %
true, // &
false, // '
false, // (
false, // )
true, // *
true, // +
false, // ,
true, // -
true, // .
true, // /
true, // 0
true, // 1
true, // 2
true, // 3
true, // 4
true, // 5
true, // 6
true, // 7
true, // 8
true, // 9
true, // :
false, // ;
true, // <
true, // =
true, // >
true, // ?
true, // @
true, // A
true, // B
true, // C
true, // D
true, // E
true, // F
true, // G
true, // H
true, // I
true, // J
true, // K
true, // L
true, // M
true, // N
true, // O
true, // P
true, // Q
true, // R
true, // S
true, // T
true, // U
true, // V
true, // W
true, // X
true, // Y
true, // Z
true, // [
false, // \
true, // ]
true, // ^
true, // _
false, // `
true, // a
true, // b
true, // c
true, // d
true, // e
true, // f
true, // g
true, // h
true, // i
true, // j
true, // k
true, // l
true, // m
true, // n
true, // o
true, // p
true, // q
true, // r
true, // s
true, // t
true, // u
true, // v
true, // w
true, // x
true, // y
true, // z
true, // {
false, // |
true, // }
true // ~
};
private int pos;
private char[] stream;
/**
* Creates a new s-expression parser.
*
* @param contents
* contents to parse
*/
public SExpParser(String contents) {
this.stream = contents.toCharArray();
}
private boolean isConstituent(char x) {
if (x < 32 || x > 127) {
throw new IllegalArgumentException("illegal character " + x);
}
return constituent[x - 32];
}
/**
* Reads an s-expression from the stream.
*
* @return an s-expression
*/
public SExp read() {
while (pos < stream.length) {
char x = stream[pos++];
switch (x) {
case ' ':
case '\t':
case '\r':
case '\n':
// ignore whitespace
continue;
case ';':
skipComment();
break;
case '(':
return readList(x);
case '"':
return readString();
case '\\':
return readTokenSingleEscape();
case '|':
throw new UnsupportedOperationException(
"multiple escape not yet implemented");
default:
if (isConstituent(x)) {
return readToken(x);
} else {
throw new IllegalStateException("unexpected character " + x);
}
}
}
return null;
}
/**
* Parses a list of s-expressions.
*
* @return an SExprList
*/
private SExpList readList(char x) {
SExpList list = new SExpList();
while (pos < stream.length) {
x = stream[pos++];
if (x == ')') {
return list;
} else {
pos--;
list.getExpressions().add(read());
}
}
if (x != ')') {
throw new IllegalStateException(
"unexpected end of file when parsing list");
}
return list;
}
/**
* Parses a string.
*
* @return a SExprAtom
*/
private SExpAtom readString() {
StringBuilder builder = new StringBuilder();
if (pos < stream.length) {
char c = stream[pos++];
while (pos < stream.length && c != '"') {
builder.append(c);
c = stream[pos++];
if (c == '\\') {
if (pos < stream.length) {
c = stream[pos++];
} else {
throw new IllegalStateException(
"unexpected back-slash at the end of file");
}
}
}
if (c != '"') {
throw new IllegalStateException(
"unexpected end of file when parsing String");
}
}
return new SExpString(builder.toString());
}
/**
* Reads a token starting with the given character.
*
* @param y
* a character starting the token
* @return an atomic s-expression
*/
private SExpAtom readToken(char y) {
StringBuilder builder = new StringBuilder();
builder.append(y);
boolean keep = true;
while (pos < stream.length && keep) {
y = stream[pos++];
switch (y) {
case '\\':
if (pos < stream.length) {
char z = stream[pos++];
builder.append(z);
break;
} else {
throw new IllegalStateException(
"unexpected back-slash at the end of file");
}
case '|':
throw new UnsupportedOperationException(
"multiple escape not yet implemented");
case '"':
case '\'':
case '(':
case ')':
case ',':
case ';':
case '`':
// terminating macro character => unread char
pos--;
keep = false;
break;
case ' ':
case '\t':
case '\r':
case '\n':
keep = false;
break;
default:
if (isConstituent(y)) {
builder.append(y);
} else {
throw new IllegalStateException("unexpected character " + y);
}
}
}
return new SExpSymbol(builder.toString());
}
/**
* Reads a token starting with a single escape.
*
* @return an atomic s-expression
*/
private SExpAtom readTokenSingleEscape() {
if (pos < stream.length) {
char y = stream[pos++];
return readToken(y);
} else {
throw new IllegalStateException(
"unexpected back-slash at the end of file");
}
}
/**
* Skips characters until \r or \n is encountered.
*/
private void skipComment() {
if (pos < stream.length) {
char c = stream[pos++];
while (pos < stream.length && c != '\r' && c != '\n') {
c = stream[pos++];
}
}
}
}