/*
*
* Copyright 2012 lexergen.
* This file is part of lexergen.
*
* lexergen is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* lexergen is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with lexergen. If not, see <http://www.gnu.org/licenses/>.
*
* lexergen:
* A tool to chunk source code into tokens for further processing in a compiler chain.
*
* Projectgroup: bi, bii
*
* Authors: Johannes Dahlke
*
* Module: Softwareprojekt Übersetzerbau 2012
*
* Created: Apr. 2012
* Version: 1.0
*
*/
package de.fuberlin.bii.bufferedreader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import de.fuberlin.bii.utils.Notification;
/**
* Eingabeleser arbeitet mit einem Pufferpaar und Wächtern
* wie im Drachenbuch beschrieben.
*
* TODO test it. Reader ist noch nicht vollständig getestet.
*
* @author Johannes Dahlke
*
*/
public class BufferedLexemeReader implements LexemeReader {
private static final int NUM_BUFFER_SIZE = 1024;
private static final char EOF_CHAR = 0x1A;
private File file;
private String sourceFile;
private int lexemeBeginMarker;
private int forwardPosition;
private char[] buffer1 = new char[NUM_BUFFER_SIZE];
private char[] buffer2 = new char[NUM_BUFFER_SIZE];
private FileReader fileReader;
private boolean endOfFileReached = false;
public BufferedLexemeReader( String sourceFile) throws LexemeReaderException {
super();
this.sourceFile = sourceFile;
reopen();
}
private void initBuffer() {
readNextBlockIntoBuffer( buffer1);
// Initialize file pointer
lexemeBeginMarker = 0;
forwardPosition = 0;
}
private char readCharFromBufferAtPosition( int position) {
if ( position >= NUM_BUFFER_SIZE)
return buffer2[ forwardPosition % NUM_BUFFER_SIZE];
else
return buffer1[ forwardPosition];
}
private int readNextBlockIntoBuffer( char[] buffer) {
// read a block, but leave room for the EOF
int readedChars;
try {
readedChars = fileReader.read( buffer, 0, NUM_BUFFER_SIZE -1);
} catch ( IOException e) {
Notification.printDebugException( e);
buffer[0] = EOF_CHAR;
return 0;
}
if ( readedChars == -1) {
// the end of file is reached
buffer[0] = EOF_CHAR;
readedChars = 0;
} else {
if ( readedChars < NUM_BUFFER_SIZE -1)
// the end of file must be reached after readedChars chars
buffer[readedChars] = EOF_CHAR;
}
// anyway mark the end of buffer with EOF
buffer[NUM_BUFFER_SIZE-1] = EOF_CHAR;
return readedChars;
}
private boolean isEndOfFile() {
return endOfFileReached;
}
private static int getCurrentPosition() throws Exception {
throw new Exception( "Not yet implemented");
}
/**
* Liefert das nächste Zeichen der Eingabe.
*
* @return das nächste Zeichen.
* @throws EndOfFileException
* @throw LexemeReaderException wenn das Ende der Datei erreicht ist oder
* ein IO-Fehler beim Lesen der Datei aufgetreten ist.
*/
public char getNextChar() throws LexemeReaderException {
char result;
char readedChar = readCharFromBufferAtPosition( forwardPosition);
if ( readedChar == EOF_CHAR) {
if ( forwardPosition / (NUM_BUFFER_SIZE -1) == 1) {
// Read pointer is at the end of buffer1
if ( lexemeBeginMarker >= NUM_BUFFER_SIZE)
throw new LexemeReaderException( "Oversized lexeme. Accept lexeme first.");
// reload the buffer 2
readNextBlockIntoBuffer( buffer2);
// set pointer to the begin of buffer 2 simple by recall this function
result = getNextChar();
} else if ( forwardPosition / ( NUM_BUFFER_SIZE -1) == 2) {
// Read pointer is at the end of buffer 2
if ( lexemeBeginMarker < NUM_BUFFER_SIZE)
throw new LexemeReaderException( "Oversized lexeme. Accept lexeme first.");
// reload the buffer 1
readNextBlockIntoBuffer( buffer1);
// set pointer to the begin of buffer 1
forwardPosition = 0;
// and then recall this function
result = getNextChar();
} else {
endOfFileReached = true;
// the readed EOF is the real EOF of the file
// return it (to stop the lexical analysis)
result = EOF_CHAR;
}
} else
// otherwise return the readed char
result = readedChar;
// do not move over the end
if ( result != SpecialChars.CHAR_EOF)
forwardPosition++;
return result;
}
/**
* Setzt den Positionszeiger des Lesers hinter das Ende des
* zuletzt akzeptierten Lexems.
* @throws LexemeReaderException
*/
public void reset() throws LexemeReaderException {
forwardPosition = lexemeBeginMarker -1;
endOfFileReached = false;
}
/**
* Akzeptiert das zuletzt gelesene Lexem, indem es den Marker {@link #lexemeBeginMarker}
* genau hinter dem zuletzt gelesenen Lexem positioniert.
* @throws LexemeReaderException
*/
public void accept() throws LexemeReaderException {
if ( forwardPosition == NUM_BUFFER_SIZE -2) {
lexemeBeginMarker = NUM_BUFFER_SIZE;
} else if ( forwardPosition == 2 *NUM_BUFFER_SIZE -2) {
lexemeBeginMarker = 0;
} else {
lexemeBeginMarker = forwardPosition +1;
}
}
/**
* Setzt den Zeiger der aktuellen Leseposition um die angegebene Anzahl an Schritten zurück.
*/
public void stepBackward( int steps) throws LexemeReaderException {
// todo: check if this func is error free
forwardPosition = Math.max( lexemeBeginMarker-1, forwardPosition -steps);
}
/**
* Öffnet die Eingabedatei erneut. Der nächste Aufruf von {@link #getNextChar()}
* liefert dann das erste Zeichen aus der Quelldatei.
* @throws LexemeReaderException
*/
public void reopen() throws LexemeReaderException {
try {
// we open the file read only
file = new File( sourceFile);
fileReader = new FileReader( file);
initBuffer();
} catch ( FileNotFoundException e) {
Notification.printDebugException( e);
throw new LexemeReaderException( String.format("Cannot open the source file '%s'.", sourceFile));
}
}
}