/*************************************************************************** * Copyright (C) by Fabrizio Montesi * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU Library General Public License as * * published by the Free Software Foundation; either version 2 of the * * License, or (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU Library General Public * * License along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * * * For details about the authors of this software, see the AUTHORS file. * ***************************************************************************/ package jolie.net.http; import java.io.IOException; import java.io.InputStream; import java.net.URI; import jolie.lang.parse.Scanner; import jolie.lang.parse.Scanner.Token; import jolie.lang.parse.Scanner.TokenType; public class HttpScanner { private final InputStream stream; private int state; // current state private int currInt; private char ch; private static final int OVERFLOW_NET = 8192; public HttpScanner( InputStream stream, URI source ) throws IOException { this.stream = stream; readChar(); } public String readLine() throws IOException { StringBuilder buffer = new StringBuilder(); readChar(); while( !Scanner.isNewLineChar( ch ) ) { buffer.append( ch ); readChar(); } return buffer.toString(); } /* * TODO: remove code duplication from jolie.lang.Scanner */ public String readWord() throws IOException { return readWord( true ); } public String readWord( boolean readChar ) throws IOException { StringBuilder buffer = new StringBuilder(); if ( readChar ) { readChar(); } do { buffer.append( ch ); readChar(); } while( !Scanner.isSeparator( ch ) ); return buffer.toString(); } public void eatSeparators() throws IOException { while( Scanner.isSeparator( ch ) ) { readChar(); } } public void eatSeparatorsUntilEOF() throws IOException { while( Scanner.isSeparator( ch ) && stream.available() > 0 ) { readChar(); } } public char currentCharacter() { return ch; } public InputStream inputStream() { return stream; } public final void readChar() throws IOException { currInt = stream.read(); ch = (char)currInt; } public Token getToken() throws IOException { state = 1; StringBuilder builder = new StringBuilder(); builder.append( ch ); int i; String tmp; while ( currInt != -1 && Scanner.isSeparator( ch ) ) { readChar(); builder.append( ch ); tmp = builder.toString(); if ( (i=tmp.indexOf( '\n', 0 )) < tmp.indexOf( '\n', i + 1 ) ) { return new Token( TokenType.EOF ); } } if ( currInt == -1 ) return new Token( TokenType.EOF ); boolean stopOneChar = false; Token retval = null; builder = new StringBuilder(); while ( currInt != -1 && retval == null ) { switch( state ) { /* When considering multi-characters tokens (states > 1), * remember to read another character in case of a * specific character (==) check. */ case 1: // First character if ( Character.isLetter( ch ) ) state = 2; else if ( Character.isDigit( ch ) ) state = 3; else if ( ch == '"' ) state = 4; else if ( ch == '+' ) state = 5; else if ( ch == '=' ) state = 6; else if ( ch == '|' ) state = 7; else if ( ch == '&' ) state = 8; else if ( ch == '<' ) state = 9; else if ( ch == '>' ) state = 10; else if ( ch == '!' ) state = 11; else if ( ch == '/' ) state = 12; else if ( ch == '-' ) state = 14; else { // ONE CHARACTER TOKEN if ( ch == '(' ) retval = new Token( TokenType.LPAREN ); else if ( ch == ')' ) retval = new Token( TokenType.RPAREN ); else if ( ch == '[' ) retval = new Token( TokenType.LSQUARE ); else if ( ch == ']' ) retval = new Token( TokenType.RSQUARE ); else if ( ch == '{' ) retval = new Token( TokenType.LCURLY ); else if ( ch == '}' ) retval = new Token( TokenType.RCURLY ); else if ( ch == '*' ) retval = new Token( TokenType.ASTERISK ); else if ( ch == '@' ) retval = new Token( TokenType.AT ); else if ( ch == ':' ) retval = new Token( TokenType.COLON ); else if ( ch == ',' ) retval = new Token( TokenType.COMMA ); else if ( ch == ';' ) retval = new Token( TokenType.SEQUENCE ); else if ( ch == '.' ) retval = new Token( TokenType.DOT ); else if ( ch == '/' ) retval = new Token( TokenType.DIVIDE ); readChar(); } break; case 2: // ID if ( !Character.isLetterOrDigit( ch ) && ch != '_' && ch != '-' && ch != '+' ) { retval = new Token( TokenType.ID, builder.toString() ); } break; case 3: // INT if ( !Character.isDigit( ch ) ) retval = new Token( TokenType.INT, builder.toString() ); break; case 4: // STRING if ( ch == '"' ) { retval = new Token( TokenType.STRING, builder.substring( 1 ) ); readChar(); } else if ( ch == '\\' ) { // Parse special characters readChar(); if ( ch == '\\' ) builder.append( '\\' ); else if ( ch == 'n' ) builder.append( '\n' ); else if ( ch == 't' ) builder.append( '\t' ); else if ( ch == '"' ) builder.append( '"' ); else throw new IOException( "malformed string: bad \\ usage" ); stopOneChar = true; readChar(); } break; case 5: // PLUS OR CHOICE if ( ch == '+' ) { retval = new Token( TokenType.INCREMENT ); readChar(); } else retval = new Token( TokenType.PLUS ); break; case 6: // ASSIGN OR EQUAL if ( ch == '=' ) { retval = new Token( TokenType.EQUAL ); readChar(); } else retval = new Token( TokenType.ASSIGN ); break; case 7: // PARALLEL OR LOGICAL OR if ( ch == '|' ) { retval = new Token( TokenType.OR ); readChar(); } else retval = new Token( TokenType.PARALLEL ); break; case 8: // LOGICAL AND if ( ch == '&' ) { retval = new Token( TokenType.AND ); readChar(); } break; case 9: // LANGLE OR MINOR_OR_EQUAL if ( ch == '=' ) { retval = new Token( TokenType.MINOR_OR_EQUAL ); readChar(); } else retval = new Token( TokenType.LANGLE ); break; case 10: // RANGLE OR MINOR_OR_EQUAL if ( ch == '=' ) { retval = new Token( TokenType.MAJOR_OR_EQUAL ); readChar(); } else retval = new Token( TokenType.RANGLE ); break; case 11: // NOT OR NOT_EQUAL if ( ch == '=' ) { retval = new Token( TokenType.NOT_EQUAL ); readChar(); } else retval = new Token( TokenType.NOT ); break; case 12: // DIVIDE OR BEGIN_COMMENT OR LINE_COMMENT /*if ( ch == '*' ) { state = 13; readChar(); } else if ( ch == '/' ) { state = 15; readChar(); } else*/ retval = new Token( TokenType.DIVIDE ); break; case 13: // WAITING FOR END_COMMENT if ( ch == '*' ) { readChar(); stopOneChar = true; if ( ch == '/' ) { readChar(); retval = getToken(); } } break; case 14: // MINUS OR (negative) INT if ( Character.isDigit( ch ) ) state = 3; else retval = new Token( TokenType.MINUS ); break; case 15: // LINE_COMMENT: waiting for end of line if ( ch == '\n' ) { readChar(); retval = getToken(); } break; default: retval = new Token( TokenType.ERROR ); break; } if ( retval == null ) { if ( stopOneChar ) { stopOneChar = false; } else { if ( builder.length() > OVERFLOW_NET ) { throw new IOException( "Token length exceeds maximum allowed limit (" + OVERFLOW_NET + " bytes). First 10 characters: " + builder.toString().substring( 0, 10 ) + " Last 10 characters: " + builder.toString().substring( builder.length() - 10, builder.length() ) ); } builder.append( ch ); readChar(); } } } if ( retval == null ) retval = new Token( TokenType.ERROR ); return retval; } }