/* * Copyright (C) 2008 Universidade Federal de Campina Grande * * This file is part of OurGrid. * * OurGrid is free software: you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.ourgrid.common.specification.token; import java.io.IOException; import org.ourgrid.common.specification.CodesTable; import org.ourgrid.common.specification.CompilerMessages; import org.ourgrid.common.specification.SpecialCharException; import org.ourgrid.common.specification.TokenDelimiter; import org.ourgrid.common.specification.io.CharReader; /** * It is a Token object that is recognized as a string at the CodesTable. * * @see org.ourgrid.common.specification.CodesTable */ public class StringToken extends Token { private CodesTable codesTable = CodesTable.getInstance(); /** Store all the special chars that are readed only after a "\" symbol. */ private TokenDelimiter specialChar; public StringToken() { specialChar = new TokenDelimiter(); specialChar.addDelimiter( '\"' ); specialChar.addDelimiter( '\\' ); } /** * Check if the character passed as paramether is part of a string. This * method of reading string considers many symbols as end-of-word. Check * symbol at method "this.isEndOfWord()" * * @param firstPart - Is the String that probably begins the StringToken * symbol * @param reader - Is the reader that controls the character reading process * from source. * @return A Token object if a string was recognized beggining with * "firstPart"; or "null" if it was not. * @see org.ourgrid.common.specification.CodesTable */ public Token readStringToken( String firstPart, CharReader reader ) throws IOException { StringBuffer buffer = new StringBuffer(); buffer.append( firstPart ); char next = reader.readChar(); while ( !isEndOfWord( next, reader ) && next != CharReader.EOF_CHAR ) { buffer.append( next ); next = reader.readChar(); } String tokenSymbol = buffer.toString(); Token theToken = getToken( tokenSymbol, reader ); return this.checkOtherTypes( theToken ); } /** * Generalizes the token reading process when considering the token * delimiters desired. * * @param reader the reader able to get the characters from source. * @param delimiters the delimiter's set * @return A token where the token's symbol is the "string read" * @throws IOException * @throws SpecialCharException If a back slash is found a no special char * was recognized. */ public Token readString( CharReader reader, TokenDelimiter delimiters ) throws SpecialCharException, IOException { StringBuffer buffer = new StringBuffer(); char next = reader.readNonBlankChar(); if ( next == '\"' ) { delimiters = new TokenDelimiter(); delimiters.addDelimiter( '\"' ); next = reader.readNonBlankChar(); } // The End-Of-File char is always a delimiter delimiters.addDelimiter( CharReader.EOF_CHAR ); while ( !delimiters.contains( next ) ) { if ( next == '\\' ) { next = this.checkSpecialChar( reader ); } buffer.append( next ); next = reader.readChar(); } // Will unread a delimiter if it is equals to "\n" and only when it was // used as delimiter // It means that a \n will not be unread if it appears alone. if ( next == '\n' && buffer.length() != 0 ) reader.unreadChar( next ); this.setCode( codesTable.getCode( CodesTable.STRING ) ); this.setLine( reader.getActualLine() ); this.setSymbol( buffer.toString().trim() ); return this; } /** * Check if the next character readed form reader is a valid special one. It * is used after read a '\' (back slash) symbol. * * @param reader the reader from source. * @return the character to be inserted as next. * @throws IOException If an error occurs at reader. * @throws SpecialCharException */ private char checkSpecialChar( CharReader reader ) throws IOException, SpecialCharException { char nextNext = reader.readChar(); if ( specialChar.contains( nextNext ) ) { // It is the treatment for the actual special character " and \, but // it probably // have to change to another special characters. return nextNext; } throw new SpecialCharException( CompilerMessages.BAD_SPECIAL_CHAR( reader.getActualLine() ) ); } /** * @param theToken token read as StringToken * @return The same StringToken if it is not other "special type", the * special type operator or a special terminal symbol. */ private Token checkOtherTypes( Token theToken ) { String symbol = theToken.getSymbol(); int line = theToken.getLine(); int code = codesTable.getCode( symbol ); if ( code != 0 ) { int type = codesTable.getType( symbol ); if ( type == CodesTable.OPERATOR ) { theToken = new Operator(); } else if ( type == CodesTable.RESERVED_WORD ) { theToken = new Token(); } theToken.setSymbol( symbol ); theToken.setLine( line ); theToken.setCode( code ); } return theToken; } /** * Builds this object to be returned using the informations passed as * paramether. * * @param tokenSymbol the peace of source read * @param reader the reader able to read source * @return Token The token that could be build using the tokenSymbol */ private Token getToken( String tokenSymbol, CharReader reader ) { CodesTable codesTable = CodesTable.getInstance(); int code = codesTable.getCode( CodesTable.STRING ); // Setting the Token object this.setSymbol( tokenSymbol ); this.setCode( code ); this.setLine( reader.getActualLine() ); return this; } /** * Tells if the next chararacter is a symbol that defines the end of a * string for the commom lexical compiler. OBS.: If the character is found * as a End-Of-Word then it will be unread! * * @param theChar The character to be analyzed. * @param reader The reader used to read the source. * @return Will return true if the next char is any of this symbols: blanck * space, tab, end of line, '\t', ';', '{', '}', '(', ')', ':'; that * are symbols that represents a end of a string. * @throws IOException Thrown if could not unread a char from the "reader". */ public static boolean isEndOfWord( char theChar, CharReader reader ) throws IOException { if ( theChar == ' ' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '\t' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '\n' ) { reader.unreadChar( theChar ); return true; } if ( theChar == ';' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '{' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '}' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '(' ) { reader.unreadChar( theChar ); return true; } if ( theChar == ')' ) { reader.unreadChar( theChar ); return true; } if ( theChar == ':' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '=' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '>' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '<' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '&' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '|' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '!' ) { reader.unreadChar( theChar ); return true; } if ( theChar == '\"' ) { reader.unreadChar( theChar ); return true; } return false; } }