// $ANTLR 3.4 Tokenizer.g 2012-08-31 09:41:11 package edu.isi.karma.cleaning; import org.antlr.runtime.CharStream; import org.antlr.runtime.EarlyExitException; import org.antlr.runtime.Lexer; import org.antlr.runtime.MismatchedSetException; import org.antlr.runtime.NoViableAltException; import org.antlr.runtime.RecognitionException; import org.antlr.runtime.RecognizerSharedState; @SuppressWarnings({"all", "warnings", "unchecked"}) public class Tokenizer extends Lexer { public static final int EOF=-1; public static final int BLANK=4; public static final int DIGIT=5; public static final int END=6; public static final int LETTER=7; public static final int LOWER=8; public static final int LWRD=9; public static final int NUMBER=10; public static final int START=11; public static final int SYBS=12; public static final int SYMBOL=13; public static final int UPPER=14; public static final int UWRD=15; // delegates // delegators public Lexer[] getDelegates() { return new Lexer[] {}; } public Tokenizer() {} public Tokenizer(CharStream input) { this(input, new RecognizerSharedState()); } public Tokenizer(CharStream input, RecognizerSharedState state) { super(input,state); } public String getGrammarFileName() { return "Tokenizer.g"; } // $ANTLR start "BLANK" public final void mBLANK() throws RecognitionException { try { int _type = BLANK; int _channel = DEFAULT_TOKEN_CHANNEL; // Tokenizer.g:3:6: ( ( '\\t' | ' ' | '\\r' | '\\n' | '\\u000C' ) ) // Tokenizer.g: { if ( (input.LA(1) >= '\t' && input.LA(1) <= '\n')||(input.LA(1) >= '\f' && input.LA(1) <= '\r')||input.LA(1)==' ' ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } state.type = _type; state.channel = _channel; } finally { // do for sure before leaving } } // $ANTLR end "BLANK" // $ANTLR start "UWRD" public final void mUWRD() throws RecognitionException { try { int _type = UWRD; int _channel = DEFAULT_TOKEN_CHANNEL; // Tokenizer.g:4:6: ( UPPER ) // Tokenizer.g: { if ( (input.LA(1) >= 'A' && input.LA(1) <= 'Z') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } state.type = _type; state.channel = _channel; } finally { // do for sure before leaving } } // $ANTLR end "UWRD" // $ANTLR start "LWRD" public final void mLWRD() throws RecognitionException { try { int _type = LWRD; int _channel = DEFAULT_TOKEN_CHANNEL; // Tokenizer.g:5:6: ( ( LOWER )+ ) // Tokenizer.g:5:8: ( LOWER )+ { // Tokenizer.g:5:8: ( LOWER )+ int cnt1=0; loop1: do { int alt1=2; int LA1_0 = input.LA(1); if ( ((LA1_0 >= 'a' && LA1_0 <= 'z')) ) { alt1=1; } switch (alt1) { case 1 : // Tokenizer.g: { if ( (input.LA(1) >= 'a' && input.LA(1) <= 'z') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } break; default : if ( cnt1 >= 1 ) break loop1; EarlyExitException eee = new EarlyExitException(1, input); throw eee; } cnt1++; } while (true); } state.type = _type; state.channel = _channel; } finally { // do for sure before leaving } } // $ANTLR end "LWRD" // $ANTLR start "NUMBER" public final void mNUMBER() throws RecognitionException { try { int _type = NUMBER; int _channel = DEFAULT_TOKEN_CHANNEL; // Tokenizer.g:6:7: ( ( DIGIT )+ ) // Tokenizer.g:6:9: ( DIGIT )+ { // Tokenizer.g:6:9: ( DIGIT )+ int cnt2=0; loop2: do { int alt2=2; int LA2_0 = input.LA(1); if ( ((LA2_0 >= '0' && LA2_0 <= '9')) ) { alt2=1; } switch (alt2) { case 1 : // Tokenizer.g: { if ( (input.LA(1) >= '0' && input.LA(1) <= '9') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } break; default : if ( cnt2 >= 1 ) break loop2; EarlyExitException eee = new EarlyExitException(2, input); throw eee; } cnt2++; } while (true); } state.type = _type; state.channel = _channel; } finally { // do for sure before leaving } } // $ANTLR end "NUMBER" // $ANTLR start "SYBS" public final void mSYBS() throws RecognitionException { try { int _type = SYBS; int _channel = DEFAULT_TOKEN_CHANNEL; // Tokenizer.g:7:5: ( ( SYMBOL ) ) // Tokenizer.g: { if ( input.LA(1)=='<'||(input.LA(1) >= '!' && input.LA(1) <= '/')||(input.LA(1) >= ':' && input.LA(1) <= '@')||(input.LA(1) >= '[' && input.LA(1) <= '`')||(input.LA(1) >= '{' && input.LA(1) <= '~') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } state.type = _type; state.channel = _channel; } finally { // do for sure before leaving } } // $ANTLR end "SYBS" // $ANTLR start "START" public final void mSTART() throws RecognitionException { try { int _type = START; int _channel = DEFAULT_TOKEN_CHANNEL; // Tokenizer.g:8:7: ( '<_START>' ) // Tokenizer.g:8:9: '<_START>' { match("<_START>"); } state.type = _type; state.channel = _channel; } finally { // do for sure before leaving } } // $ANTLR end "START" // $ANTLR start "END" public final void mEND() throws RecognitionException { try { int _type = END; int _channel = DEFAULT_TOKEN_CHANNEL; // Tokenizer.g:9:5: ( '<_END>' ) // Tokenizer.g:9:7: '<_END>' { match("<_END>"); } state.type = _type; state.channel = _channel; } finally { // do for sure before leaving } } // $ANTLR end "END" // $ANTLR start "SYMBOL" public final void mSYMBOL() throws RecognitionException { try { // Tokenizer.g:12:2: ( '!' | '#' .. '/' | ':' .. '@' | '[' .. '`' | '{' .. '~' ) // Tokenizer.g: { if ( input.LA(1)=='!'||(input.LA(1) >= '#' && input.LA(1) <= '/')||(input.LA(1) >= ':' && input.LA(1) <= '@')||(input.LA(1) >= '[' && input.LA(1) <= '`')||(input.LA(1) >= '{' && input.LA(1) <= '~') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } } finally { // do for sure before leaving } } // $ANTLR end "SYMBOL" // $ANTLR start "LETTER" public final void mLETTER() throws RecognitionException { try { // Tokenizer.g:14:2: ( LOWER | UPPER ) // Tokenizer.g: { if ( (input.LA(1) >= 'A' && input.LA(1) <= 'Z')||(input.LA(1) >= 'a' && input.LA(1) <= 'z') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } } finally { // do for sure before leaving } } // $ANTLR end "LETTER" // $ANTLR start "LOWER" public final void mLOWER() throws RecognitionException { try { // Tokenizer.g:16:2: ( 'a' .. 'z' ) // Tokenizer.g: { if ( (input.LA(1) >= 'a' && input.LA(1) <= 'z') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } } finally { // do for sure before leaving } } // $ANTLR end "LOWER" // $ANTLR start "UPPER" public final void mUPPER() throws RecognitionException { try { // Tokenizer.g:18:2: ( 'A' .. 'Z' ) // Tokenizer.g: { if ( (input.LA(1) >= 'A' && input.LA(1) <= 'Z') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } } finally { // do for sure before leaving } } // $ANTLR end "UPPER" // $ANTLR start "DIGIT" public final void mDIGIT() throws RecognitionException { try { // Tokenizer.g:19:16: ( '0' .. '9' ) // Tokenizer.g: { if ( (input.LA(1) >= '0' && input.LA(1) <= '9') ) { input.consume(); } else { MismatchedSetException mse = new MismatchedSetException(null,input); recover(mse); throw mse; } } } finally { // do for sure before leaving } } // $ANTLR end "DIGIT" public void mTokens() throws RecognitionException { // Tokenizer.g:1:8: ( BLANK | UWRD | LWRD | NUMBER | SYBS | START | END ) int alt3=7; switch ( input.LA(1) ) { case '\t': case '\n': case '\f': case '\r': case ' ': { alt3=1; } break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': { alt3=2; } break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': { alt3=3; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { alt3=4; } break; case '<': { int LA3_5 = input.LA(2); if ( (LA3_5=='_') ) { int LA3_7 = input.LA(3); if ( (LA3_7=='S') ) { alt3=6; } else if ( (LA3_7=='E') ) { alt3=7; } else { NoViableAltException nvae = new NoViableAltException("", 3, 7, input); throw nvae; } } else { alt3=5; } } break; case '!': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case ':': case ';': case '=': case '>': case '?': case '@': case '[': case '\\': case ']': case '^': case '_': case '`': case '{': case '|': case '}': case '~': { alt3=5; } break; default: NoViableAltException nvae = new NoViableAltException("", 3, 0, input); throw nvae; } switch (alt3) { case 1 : // Tokenizer.g:1:10: BLANK { mBLANK(); } break; case 2 : // Tokenizer.g:1:16: UWRD { mUWRD(); } break; case 3 : // Tokenizer.g:1:21: LWRD { mLWRD(); } break; case 4 : // Tokenizer.g:1:26: NUMBER { mNUMBER(); } break; case 5 : // Tokenizer.g:1:33: SYBS { mSYBS(); } break; case 6 : // Tokenizer.g:1:38: START { mSTART(); } break; case 7 : // Tokenizer.g:1:44: END { mEND(); } break; } } }