StringToken.java example

/*
 * Copyright (C) 2008 Universidade Federal de Campina Grande
 *  
 * This file is part of OurGrid. 
 *
 * OurGrid is free software: you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License as published by the Free 
 * Software Foundation, either version 3 of the License, or (at your option) 
 * any later version. 
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT 
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 * for more details. 
 * 
 * You should have received a copy of the GNU Lesser General Public License 
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 * 
 */
package org.ourgrid.common.specification.token;

import java.io.IOException;

import org.ourgrid.common.specification.CodesTable;
import org.ourgrid.common.specification.CompilerMessages;
import org.ourgrid.common.specification.SpecialCharException;
import org.ourgrid.common.specification.TokenDelimiter;
import org.ourgrid.common.specification.io.CharReader;

/**
 * It is a Token object that is recognized as a string at the CodesTable.
 * 
 * @see org.ourgrid.common.specification.CodesTable
 */
public class StringToken extends Token {

	private CodesTable codesTable = CodesTable.getInstance();

	/** Store all the special chars that are readed only after a "\" symbol. */
	private TokenDelimiter specialChar;


	public StringToken() {

		specialChar = new TokenDelimiter();
		specialChar.addDelimiter( '\"' );
		specialChar.addDelimiter( '\\' );
	}


	/**
	 * Check if the character passed as paramether is part of a string. This
	 * method of reading string considers many symbols as end-of-word. Check
	 * symbol at method "this.isEndOfWord()"
	 * 
	 * @param firstPart - Is the String that probably begins the StringToken
	 *        symbol
	 * @param reader - Is the reader that controls the character reading process
	 *        from source.
	 * @return A Token object if a string was recognized beggining with
	 *         "firstPart"; or "null" if it was not.
	 * @see org.ourgrid.common.specification.CodesTable
	 */
	public Token readStringToken( String firstPart, CharReader reader ) throws IOException {

		StringBuffer buffer = new StringBuffer();
		buffer.append( firstPart );
		char next = reader.readChar();
		while ( !isEndOfWord( next, reader ) && next != CharReader.EOF_CHAR ) {
			buffer.append( next );
			next = reader.readChar();
		}

		String tokenSymbol = buffer.toString();
		Token theToken = getToken( tokenSymbol, reader );

		return this.checkOtherTypes( theToken );

	}


	/**
	 * Generalizes the token reading process when considering the token
	 * delimiters desired.
	 * 
	 * @param reader the reader able to get the characters from source.
	 * @param delimiters the delimiter's set
	 * @return A token where the token's symbol is the "string read"
	 * @throws IOException
	 * @throws SpecialCharException If a back slash is found a no special char
	 *         was recognized.
	 */
	public Token readString( CharReader reader, TokenDelimiter delimiters ) throws SpecialCharException, IOException {

		StringBuffer buffer = new StringBuffer();
		char next = reader.readNonBlankChar();

		if ( next == '\"' ) {
			delimiters = new TokenDelimiter();
			delimiters.addDelimiter( '\"' );
			next = reader.readNonBlankChar();
		}

		// The End-Of-File char is always a delimiter
		delimiters.addDelimiter( CharReader.EOF_CHAR );
		while ( !delimiters.contains( next ) ) {
			if ( next == '\\' ) {
				next = this.checkSpecialChar( reader );
			}
			buffer.append( next );
			next = reader.readChar();
		}
		// Will unread a delimiter if it is equals to "\n" and only when it was
		// used as delimiter
		// It means that a \n will not be unread if it appears alone.
		if ( next == '\n' && buffer.length() != 0 )
			reader.unreadChar( next );
		this.setCode( codesTable.getCode( CodesTable.STRING ) );
		this.setLine( reader.getActualLine() );
		this.setSymbol( buffer.toString().trim() );
		return this;
	}


	/**
	 * Check if the next character readed form reader is a valid special one. It
	 * is used after read a '\' (back slash) symbol.
	 * 
	 * @param reader the reader from source.
	 * @return the character to be inserted as next.
	 * @throws IOException If an error occurs at reader.
	 * @throws SpecialCharException
	 */
	private char checkSpecialChar( CharReader reader ) throws IOException, SpecialCharException {

		char nextNext = reader.readChar();
		if ( specialChar.contains( nextNext ) ) {
			// It is the treatment for the actual special character " and \, but
			// it probably
			// have to change to another special characters.
			return nextNext;
		}
		throw new SpecialCharException( CompilerMessages.BAD_SPECIAL_CHAR( reader.getActualLine() ) );
	}


	/**
	 * @param theToken token read as StringToken
	 * @return The same StringToken if it is not other "special type", the
	 *         special type operator or a special terminal symbol.
	 */
	private Token checkOtherTypes( Token theToken ) {

		String symbol = theToken.getSymbol();
		int line = theToken.getLine();
		int code = codesTable.getCode( symbol );

		if ( code != 0 ) {
			int type = codesTable.getType( symbol );
			if ( type == CodesTable.OPERATOR ) {
				theToken = new Operator();
			} else if ( type == CodesTable.RESERVED_WORD ) {
				theToken = new Token();
			}
			theToken.setSymbol( symbol );
			theToken.setLine( line );
			theToken.setCode( code );
		}
		return theToken;
	}


	/**
	 * Builds this object to be returned using the informations passed as
	 * paramether.
	 * 
	 * @param tokenSymbol the peace of source read
	 * @param reader the reader able to read source
	 * @return Token The token that could be build using the tokenSymbol
	 */
	private Token getToken( String tokenSymbol, CharReader reader ) {

		CodesTable codesTable = CodesTable.getInstance();
		int code = codesTable.getCode( CodesTable.STRING );

		// Setting the Token object
		this.setSymbol( tokenSymbol );
		this.setCode( code );
		this.setLine( reader.getActualLine() );

		return this;
	}


	/**
	 * Tells if the next chararacter is a symbol that defines the end of a
	 * string for the commom lexical compiler. OBS.: If the character is found
	 * as a End-Of-Word then it will be unread!
	 * 
	 * @param theChar The character to be analyzed.
	 * @param reader The reader used to read the source.
	 * @return Will return true if the next char is any of this symbols: blanck
	 *         space, tab, end of line, '\t', ';', '{', '}', '(', ')', ':'; that
	 *         are symbols that represents a end of a string.
	 * @throws IOException Thrown if could not unread a char from the "reader".
	 */
	public static boolean isEndOfWord( char theChar, CharReader reader ) throws IOException {

		if ( theChar == ' ' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '\t' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '\n' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == ';' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '{' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '}' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '(' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == ')' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == ':' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '=' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '>' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '<' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '&' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '|' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '!' ) {
			reader.unreadChar( theChar );
			return true;
		}
		if ( theChar == '\"' ) {
			reader.unreadChar( theChar );
			return true;
		}
		return false;
	}

}