/** * */ package edu.berkeley.nlp.io; /* The following code was generated by JFlex 1.3.5 on 12/2/02 9:02 PM */ import java.util.*; import java.io.*; /** * doesn't handle \"can not\" to \"cannot\". */ /** * This class is a scanner generated by * <a href="http://www.jflex.de/">JFlex</a> 1.3.5 * on 12/2/02 9:02 PM from the specification file * <tt>file:/nlp/u1/jsmarr/cs276a/src/googleling/PTB2TextLexer.flex</tt> */ class PTB2TextLexer { /** This character denotes the end of file */ final public static int YYEOF = -1; /** initial size of the lookahead buffer */ final private static int YY_BUFFERSIZE = 16384; /** lexical states */ final public static int YYINITIAL = 0; /** * Translates characters to character classes */ final private static String yycmap_packed = "\12\0\1\6\25\0\1\1\1\5\2\0\1\25\1\24\1\0\1\2"+ "\1\17\1\20\2\0\1\5\1\13\1\4\13\0\1\5\1\5\3\0"+ "\1\5\2\0\1\16\1\21\10\0\1\14\1\0\1\11\3\0\1\15"+ "\1\0\1\12\6\0\1\22\1\0\1\23\2\0\1\3\15\0\1\7"+ "\5\0\1\10\uff8b\0"; /** * Translates characters to character classes */ final private static char [] yycmap = yy_unpack_cmap(yycmap_packed); /** * Translates a state to a row index in the transition table */ final private static int yy_rowMap [] = { 0, 22, 44, 66, 88, 110, 132, 154, 176, 198, 220, 88, 242, 264, 286, 88, 88, 88, 88, 308, 330, 88, 88, 88, 88, 352, 374, 396, 418, 440, 462, 88, 88, 484, 506, 528, 550, 572, 594 }; /** * The packed transition table of the DFA (part 0) */ final private static String yy_packed0 = "\1\2\1\3\1\2\1\4\2\2\1\5\4\2\1\6"+ "\3\2\1\7\2\2\1\10\2\2\1\11\1\2\1\0"+ "\4\2\1\0\17\2\2\0\1\12\1\0\1\13\1\14"+ "\1\0\1\15\1\0\1\16\1\0\1\17\4\0\1\20"+ "\2\0\1\21\1\22\1\0\1\2\1\23\1\2\1\24"+ "\2\2\1\0\17\2\26\0\1\2\1\0\4\2\1\0"+ "\5\2\1\25\12\2\1\26\4\2\1\0\20\2\1\27"+ "\4\2\1\0\20\2\1\30\4\2\1\0\17\2\1\14"+ "\1\0\1\31\3\14\1\0\17\14\4\0\1\32\23\0"+ "\1\33\25\0\1\34\40\0\1\35\10\0\1\2\1\31"+ "\4\2\1\0\20\2\1\0\4\2\1\0\6\2\1\36"+ "\3\2\1\37\4\2\4\0\1\14\31\0\1\40\27\0"+ "\1\41\30\0\1\42\3\0\1\43\4\0\1\2\1\0"+ "\4\2\1\0\7\2\1\44\10\2\1\0\4\2\1\0"+ "\7\2\1\45\7\2\16\0\1\46\25\0\1\47\7\0"+ "\1\2\1\0\4\2\1\0\4\2\1\7\13\2\1\0"+ "\4\2\1\0\4\2\1\10\12\2\13\0\1\20\25\0"+ "\1\21\12\0"; /** * The transition table of the DFA */ final private static int yytrans [] = yy_unpack(); /* error codes */ final private static int YY_UNKNOWN_ERROR = 0; final private static int YY_ILLEGAL_STATE = 1; final private static int YY_NO_MATCH = 2; final private static int YY_PUSHBACK_2BIG = 3; /* error messages for the codes above */ final private static String YY_ERROR_MSG[] = { "Unkown internal scanner error", "Internal error: unknown state", "Error: could not match input", "Error: pushback value was too large" }; /** * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> */ private final static byte YY_ATTRIBUTE[] = { 0, 1, 1, 1, 9, 1, 1, 1, 1, 0, 1, 9, 0, 0, 0, 9, 9, 9, 9, 1, 1, 9, 9, 9, 9, 0, 0, 0, 0, 1, 1, 9, 9, 0, 0, 1, 1, 0, 0 }; /** the input device */ private java.io.Reader yy_reader; /** the current state of the DFA */ private int yy_state; /** the current lexical state */ private int yy_lexical_state = YYINITIAL; /** this buffer contains the current text to be matched and is the source of the yytext() string */ private char yy_buffer[] = new char[YY_BUFFERSIZE]; /** the textposition at the last accepting state */ private int yy_markedPos; /** the textposition at the last state to be included in yytext */ private int yy_pushbackPos; /** the current text position in the buffer */ private int yy_currentPos; /** startRead marks the beginning of the yytext() string in the buffer */ private int yy_startRead; /** endRead marks the last character in the buffer, that has been read from input */ private int yy_endRead; /** number of newlines encountered up to the start of the matched text */ private int yyline; /** the number of characters up to the start of the matched text */ private int yychar; /** * the number of characters from the last newline up to the start of the * matched text */ private int yycolumn; /** * yy_atBOL == true <=> the scanner is currently at the beginning of a line */ private boolean yy_atBOL = true; /** yy_atEOF == true <=> the scanner is at the EOF */ private boolean yy_atEOF; /* user code: */ /* "'T WAS" { return("'TWAS"); } "'T was" { return("'Twas"); } "'t was" { return("'twas"); } "'T IS" { return("'TIS"); } "'T is" { return("'Tis"); } "'t is" { return("'tis"); } */ /** * Creates a new scanner * There is also a java.io.InputStream version of this constructor. * * @param in the java.io.Reader to read input from. */ PTB2TextLexer(java.io.Reader in) { this.yy_reader = in; } /** * Creates a new scanner. * There is also java.io.Reader version of this constructor. * * @param in the java.io.Inputstream to read input from. */ PTB2TextLexer(java.io.InputStream in) { this(new java.io.InputStreamReader(in)); } /** * Unpacks the split, compressed DFA transition table. * * @return the unpacked transition table */ private static int [] yy_unpack() { int [] trans = new int[616]; int offset = 0; offset = yy_unpack(yy_packed0, offset, trans); return trans; } /** * Unpacks the compressed DFA transition table. * * @param packed the packed transition table * @return the index of the last entry */ private static int yy_unpack(String packed, int offset, int [] trans) { int i = 0; /* index in packed string */ int j = offset; /* index in unpacked array */ int l = packed.length(); while (i < l) { int count = packed.charAt(i++); int value = packed.charAt(i++); value--; do trans[j++] = value; while (--count > 0); } return j; } /** * Unpacks the compressed character translation table. * * @param packed the packed character translation table * @return the unpacked character translation table */ private static char [] yy_unpack_cmap(String packed) { char [] map = new char[0x10000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ while (i < 86) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); } return map; } /** * Refills the input buffer. * * @return <code>false</code>, iff there was new input. * * @exception java.io.IOException if any I/O-Error occurs */ private boolean yy_refill() throws java.io.IOException { /* first: make room (if you can) */ if (yy_startRead > 0) { System.arraycopy(yy_buffer, yy_startRead, yy_buffer, 0, yy_endRead-yy_startRead); /* translate stored positions */ yy_endRead-= yy_startRead; yy_currentPos-= yy_startRead; yy_markedPos-= yy_startRead; yy_pushbackPos-= yy_startRead; yy_startRead = 0; } /* is the buffer big enough? */ if (yy_currentPos >= yy_buffer.length) { /* if not: blow it up */ char newBuffer[] = new char[yy_currentPos*2]; System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length); yy_buffer = newBuffer; } /* finally: fill the buffer with new input */ int numRead = yy_reader.read(yy_buffer, yy_endRead, yy_buffer.length-yy_endRead); if (numRead < 0) { return true; } else { yy_endRead+= numRead; return false; } } /** * Closes the input stream. */ final public void yyclose() throws java.io.IOException { yy_atEOF = true; /* indicate end of file */ yy_endRead = yy_startRead; /* invalidate buffer */ if (yy_reader != null) yy_reader.close(); } /** * Closes the current stream, and resets the * scanner to read from a new input stream. * * All internal variables are reset, the old input stream * <b>cannot</b> be reused (internal buffer is discarded and lost). * Lexical state is set to <tt>YY_INITIAL</tt>. * * @param reader the new input stream */ final public void yyreset(java.io.Reader reader) throws java.io.IOException { yyclose(); yy_reader = reader; yy_atBOL = true; yy_atEOF = false; yy_endRead = yy_startRead = 0; yy_currentPos = yy_markedPos = yy_pushbackPos = 0; yyline = yychar = yycolumn = 0; yy_lexical_state = YYINITIAL; } /** * Returns the current lexical state. */ final public int yystate() { return yy_lexical_state; } /** * Enters a new lexical state * * @param newState the new lexical state */ final public void yybegin(int newState) { yy_lexical_state = newState; } /** * Returns the text matched by the current regular expression. */ final public String yytext() { return new String( yy_buffer, yy_startRead, yy_markedPos-yy_startRead ); } /** * Returns the character at position <tt>pos</tt> from the * matched text. * * It is equivalent to yytext().charAt(pos), but faster * * @param pos the position of the character to fetch. * A value from 0 to yylength()-1. * * @return the character at position pos */ final public char yycharat(int pos) { return yy_buffer[yy_startRead+pos]; } /** * Returns the length of the matched text region. */ final public int yylength() { return yy_markedPos-yy_startRead; } /** * Reports an error that occured while scanning. * * In a wellformed scanner (no or only correct usage of * yypushback(int) and a match-all fallback rule) this method * will only be called with things that "Can't Possibly Happen". * If this method is called, something is seriously wrong * (e.g. a JFlex bug producing a faulty scanner etc.). * * Usual syntax/scanner level error handling should be done * in error fallback rules. * * @param errorCode the code of the errormessage to display */ private void yy_ScanError(int errorCode) { String message; try { message = YY_ERROR_MSG[errorCode]; } catch (ArrayIndexOutOfBoundsException e) { message = YY_ERROR_MSG[YY_UNKNOWN_ERROR]; } throw new Error(message); } /** * Pushes the specified amount of characters back into the input stream. * * They will be read again by then next call of the scanning method * * @param number the number of characters to be read again. * This number must not be greater than yylength()! */ private void yypushback(int number) { if ( number > yylength() ) yy_ScanError(YY_PUSHBACK_2BIG); yy_markedPos -= number; } /** * Resumes scanning until the next regular expression is matched, * the end of input is encountered or an I/O-Error occurs. * * @return the next token * @exception java.io.IOException if any I/O-Error occurs */ public String next() throws java.io.IOException { int yy_input; int yy_action; // cached fields: int yy_currentPos_l; int yy_startRead_l; int yy_markedPos_l; int yy_endRead_l = yy_endRead; char [] yy_buffer_l = yy_buffer; char [] yycmap_l = yycmap; int [] yytrans_l = yytrans; int [] yy_rowMap_l = yy_rowMap; byte [] yy_attr_l = YY_ATTRIBUTE; while (true) { yy_markedPos_l = yy_markedPos; yy_action = -1; yy_startRead_l = yy_currentPos_l = yy_currentPos = yy_startRead = yy_markedPos_l; yy_state = yy_lexical_state; yy_forAction: { while (true) { if (yy_currentPos_l < yy_endRead_l) yy_input = yy_buffer_l[yy_currentPos_l++]; else if (yy_atEOF) { yy_input = YYEOF; break yy_forAction; } else { // store back cached positions yy_currentPos = yy_currentPos_l; yy_markedPos = yy_markedPos_l; boolean eof = yy_refill(); // get translated positions and possibly new buffer yy_currentPos_l = yy_currentPos; yy_markedPos_l = yy_markedPos; yy_buffer_l = yy_buffer; yy_endRead_l = yy_endRead; if (eof) { yy_input = YYEOF; break yy_forAction; } else { yy_input = yy_buffer_l[yy_currentPos_l++]; } } int yy_next = yytrans_l[ yy_rowMap_l[yy_state] + yycmap_l[yy_input] ]; if (yy_next == -1) break yy_forAction; yy_state = yy_next; int yy_attributes = yy_attr_l[yy_state]; if ( (yy_attributes & 1) == 1 ) { yy_action = yy_state; yy_markedPos_l = yy_currentPos_l; if ( (yy_attributes & 8) == 8 ) break yy_forAction; } } } // store back cached position yy_markedPos = yy_markedPos_l; switch (yy_action) { case 32: { return("N'T"); } case 40: break; case 31: { return("n't"); } case 41: break; case 10: case 11: { return(yytext().substring(1, yytext().length())); } case 42: break; case 1: case 3: case 5: case 6: case 7: case 8: case 19: case 20: case 29: case 30: case 35: case 36: { return(yytext()); } case 43: break; case 2: { return(yytext()); } case 44: break; case 4: { return(yytext()); } case 45: break; case 24: { return("\""); } case 46: break; case 23: { return("$"); } case 47: break; case 22: { return("["); } case 48: break; case 21: { return("("); } case 49: break; case 15: { return(")"); } case 50: break; case 16: { return("]"); } case 51: break; case 17: { return("%"); } case 52: break; case 18: { return("`"); } case 53: break; default: if (yy_input == YYEOF && yy_startRead == yy_currentPos) { yy_atEOF = true; { return(null); } } else { yy_ScanError(YY_NO_MATCH); } } } } /** * Runs the scanner on input files. * * This is a standalone scanner, i.e. it will print any unmatched * text to System.out unchanged. * * @param argv the command line, contains the filenames to run * the scanner on. */ public static void main(String argv[]) { if (argv.length == 0) { System.out.println("Usage : java PTB2TextLexer <inputfile>"); } else { for (int i = 0; i < argv.length; i++) { PTB2TextLexer scanner = null; try { scanner = new PTB2TextLexer( new java.io.FileReader(argv[i]) ); while ( !scanner.yy_atEOF ) scanner.next(); } catch (java.io.FileNotFoundException e) { System.out.println("File not found : \""+argv[i]+"\""); } catch (java.io.IOException e) { System.out.println("IO error scanning file \""+argv[i]+"\""); System.out.println(e); } catch (Exception e) { System.out.println("Unexpected exception:"); e.printStackTrace(); } } } } }