/***** BEGIN LICENSE BLOCK *****
* Version: CPL 1.0/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Common Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.eclipse.org/legal/cpl-v10.html
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org>
* Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
* Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
* Copyright (C) 2005 Zach Dennis <zdennis@mktec.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the CPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the CPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****/
package org.jruby.lexer.yacc;
import java.io.IOException;
import java.io.Reader;
import org.jruby.util.ByteList;
/**
* This class is what feeds the lexer. It is primarily a wrapper around a
* Reader that can unread() data back onto the source. Originally, I thought
* about using the PushBackReader to handle read/unread, but I realized that
* some extremely pathological case could overflow the pushback buffer. Better
* safe than sorry. I could have combined this implementation with a
* PushbackBuffer, but the added complexity did not seem worth it.
*
* @author enebo
*/
public class LexerSource {
private static final int INITIAL_PUSHBACK_SIZE = 100;
private static final int INITIAL_LINEWIDTH_SIZE = 2048;
// Where we get new positions from.
private ISourcePositionFactory positionFactory;
// Where we get our newest char's
private final Reader reader;
// Our readback/pushback buffer.
private char buf[] = new char[INITIAL_PUSHBACK_SIZE];
// index of last character in pushback buffer
private int bufLength = -1;
// How long is every line we have run across. This makes it possible for us to unread()
// past a read() line and still know what column we are at.
private int lineWidths[] = new int[INITIAL_LINEWIDTH_SIZE];
// index of last line width in line widths list
private int lineWidthsLength = -1;
// The name of this source (e.g. a filename: foo.rb)
private final String sourceName;
// Number of newlines read from the reader
private int line = 0;
// Column of source.
private int column = 0;
// How many bytes into the source are we?
private int offset = 0;
// Flag to let us now in next read after a newline that we should reset column
private boolean nextCharIsOnANewLine = true;
/**
* Create our food-source for the lexer
*
* @param sourceName is the file we are reading
* @param reader is what represents the contents of file sourceName
*/
public LexerSource(String sourceName, Reader reader) {
this.sourceName = sourceName;
this.reader = reader;
this.positionFactory = new SourcePositionFactory(this);
}
public LexerSource(String sourceName, Reader reader, ISourcePositionFactory factory) {
this.sourceName = sourceName;
this.reader = reader;
this.positionFactory = factory;
}
/**
* Read next character from this source
*
* @return next character to viewed by the source
*/
public char read() throws IOException {
int length = bufLength;
char c;
if (length >= 0) {
c = buf[bufLength--];
} else {
c = wrappedRead();
// EOF...Do not advance column...Go straight to jail
if (c == 0) {
//offset++;
return c;
}
}
// Reset column back to zero on first read of a line (note it will be-
// come '1' by the time it leaves read().
if (nextCharIsOnANewLine) {
nextCharIsOnANewLine = false;
column = 0;
}
offset++;
column++;
if (c == '\n') {
line++;
// Since we are not reading off of unread buffer we must at the
// end of a new line for the first time. Add it.
if (length < 0) {
lineWidths[++lineWidthsLength] = column;
// If we outgrow our lineLength list then grow it
if (lineWidthsLength + 1 == lineWidths.length) {
int[] newLineWidths = new int[lineWidths.length + INITIAL_LINEWIDTH_SIZE];
System.arraycopy(lineWidths, 0, newLineWidths, 0, lineWidths.length);
lineWidths = newLineWidths;
}
}
nextCharIsOnANewLine = true;
}
return c;
}
/**
* Pushes char back onto this source. Note, this also
* allows us to push whatever is passes back into the source.
*
* @param c to be put back onto the source
*/
public void unread(char c) {
if (c != (char) 0) {
offset--;
if (c == '\n') {
line--;
column = lineWidths[line];
nextCharIsOnANewLine = true;
} else {
column--;
}
buf[++bufLength] = c;
// If we outgrow our pushback stack then grow it (this should only happen in
// pretty pathological cases).
if (bufLength + 1 == buf.length) {
char[] newBuf = new char[buf.length + INITIAL_PUSHBACK_SIZE];
System.arraycopy(buf, 0, newBuf, 0, buf.length);
buf = newBuf;
}
}
}
public boolean peek(char to) throws IOException {
char c = read();
unread(c);
return c == to;
}
/**
* What file are we lexing?
* @return the files name
*/
public String getFilename() {
return sourceName;
}
/**
* What line are we at?
* @return the line number 0...line_size-1
*/
public int getLine() {
return line;
}
/**
* Are we at beggining of line?
*
* @return the column (0..x)
*/
public int getColumn() {
return column;
}
/**
* The location of the last byte we read from the source.
*
* @return current location of source
*/
public int getOffset() {
return (offset <= 0 ? 0 : offset);
}
/**
* Where is the reader within the source {filename,row}
*
* @return the current position
*/
public ISourcePosition getPosition(ISourcePosition startPosition, boolean inclusive) {
return positionFactory.getPosition(startPosition, inclusive);
}
/**
* Where is the reader within the source {filename,row}
*
* @return the current position
*/
public ISourcePosition getPosition() {
return positionFactory.getPosition(null, false);
}
public ISourcePositionFactory getPositionFactory() {
return positionFactory;
}
/**
* Convenience method to hide exception. If we do hit an exception
* we will pretend we EOF'd.
*
* @return the current char or EOF (at EOF or on error)
*/
private char wrappedRead() throws IOException {
int c = reader.read();
// If \r\n then just pass along \n (windows)
// If \r[^\n] then pass along \n (MAC)
if (c == '\r') {
if ((c = reader.read()) != '\n') {
unread((char)c);
c = '\n';
} else {
// Position within source must reflect the actual offset and column. Since
// we ate an extra character here (this accounting is normally done in read
// ), we should update position info.
offset++;
column++;
}
}
return c != -1 ? (char) c : '\0';
}
/**
* Create a source.
*
* @param name the name of the source (e.g a filename: foo.rb)
* @param content the data of the source
* @return the new source
*/
public static LexerSource getSource(String name, Reader content) {
return new LexerSource(name, content);
}
public String readLine() throws IOException {
StringBuffer sb = new StringBuffer(80);
for (char c = read(); c != '\n' && c != '\0'; c = read()) {
sb.append(c);
}
return sb.toString();
}
public ByteList readLineBytes() throws IOException {
ByteList bytelist = new ByteList(80);
for (char c = read(); c != '\n' && c != '\0'; c = read()) {
bytelist.append(c);
}
return bytelist;
}
public void unreadMany(CharSequence buffer) {
int length = buffer.length();
for (int i = length - 1; i >= 0; i--) {
unread(buffer.charAt(i));
}
}
public boolean matchString(String match, boolean indent) throws IOException {
int length = match.length();
StringBuffer buffer = new StringBuffer(length + 20);
if (indent) {
char c;
while ((c = read()) != '\0') {
if (!Character.isWhitespace(c)) {
unread(c);
break;
}
buffer.append(c);
}
}
for (int i = 0; i < length; i++) {
char c = read();
buffer.append(c);
if (match.charAt(i) != c) {
unreadMany(buffer);
return false;
}
}
return true;
}
public boolean wasBeginOfLine() {
return getColumn() == 1;
}
public char readEscape() throws IOException {
char c = read();
switch (c) {
case '\\' : // backslash
return c;
case 'n' : // newline
return '\n';
case 't' : // horizontal tab
return '\t';
case 'r' : // carriage return
return '\r';
case 'f' : // form feed
return '\f';
case 'v' : // vertical tab
return '\u000B';
case 'a' : // alarm(bell)
return '\u0007';
case 'e' : // escape
return '\u001B';
case '0' : case '1' : case '2' : case '3' : // octal constant
case '4' : case '5' : case '6' : case '7' :
unread(c);
return scanOct(3);
case 'x' : // hex constant
int hexOffset = getColumn();
char hexValue = scanHex(2);
// No hex value after the 'x'.
if (hexOffset == getColumn()) {
throw new SyntaxException(getPosition(), "Invalid escape character syntax");
}
return hexValue;
case 'b' : // backspace
return '\010';
case 's' : // space
return ' ';
case 'M' :
if ((c = read()) != '-') {
throw new SyntaxException(getPosition(), "Invalid escape character syntax");
} else if ((c = read()) == '\\') {
return (char) (readEscape() | 0x80);
} else if (c == '\0') {
throw new SyntaxException(getPosition(), "Invalid escape character syntax");
}
return (char) ((c & 0xff) | 0x80);
case 'C' :
if ((c = read()) != '-') {
throw new SyntaxException(getPosition(), "Invalid escape character syntax");
}
case 'c' :
if ((c = read()) == '\\') {
c = readEscape();
} else if (c == '?') {
return '\u0177';
} else if (c == '\0') {
throw new SyntaxException(getPosition(), "Invalid escape character syntax");
}
return (char) (c & 0x9f);
case '\0' :
throw new SyntaxException(getPosition(), "Invalid escape character syntax");
default :
return c;
}
}
private char scanHex(int count) throws IOException {
char value = '\0';
for (int i = 0; i < count; i++) {
char c = read();
if (!RubyYaccLexer.isHexChar(c)) {
unread(c);
break;
}
value <<= 4;
value |= Integer.parseInt(""+c, 16) & 15;
}
return value;
}
private char scanOct(int count) throws IOException {
char value = '\0';
for (int i = 0; i < count; i++) {
char c = read();
if (!RubyYaccLexer.isOctChar(c)) {
unread(c);
break;
}
value <<= 3;
value |= Integer.parseInt(""+c, 8);
}
return value;
}
/**
* Get character ahead of current position by offset positions.
*
* @param anOffset is location past current position to get char at
* @return character index positions ahead of source location or EOF
*/
public char getCharAt(int anOffset) throws IOException {
StringBuffer buffer = new StringBuffer(anOffset);
// read next offset chars
for (int i = 0; i < anOffset; i++) {
buffer.append(read());
}
int length = buffer.length();
// Whoops not enough chars left EOF!
if (length == 0){
return '\0';
}
// Push chars back now that we found it
for (int i = 0; i < length; i++) {
unread(buffer.charAt(i));
}
return buffer.charAt(length - 1);
}
public String toString() {
try {
StringBuffer buffer = new StringBuffer(20);
for (int i = 0; i < 20; i++) {
buffer.append(read());
}
for (int i = 0; i < 20; i++) {
unread(buffer.charAt(buffer.length() - i - 1));
}
buffer.append(" ...");
return buffer.toString();
} catch(Exception e) {
return null;
}
}
}