LexerSource.java example

Explorer

dltk.ruby-master
- plugins
- tests
  - org.eclipse.dltk.ruby.core.tests
    - src
      - org
        eclipse
        dltk
        ruby
        core
        tests
        Activator.java
        AllTests.java
        assist
        RubySelectionTests.java
        parser
        AbstractASTTest.java
        AbstractRubyParserTests.java
        JRuby1RubyParserTests.java
        RubyParserRecoveryTests.java
        RubyParserTests.java
        SimpleRubyParserTests.java
        StdlibRubyParserTests.java
        jruby
        AST2StringVisitor.java
        ParserSuite.java
        ZippedParserSuite.java
        resources
        RubyResourcesTests.java
        rewriter
        RewriterTests.java
        search
        RubyFullNameSearchTests.java
        RubyMethodSearchTests.java
        RubySearchTests.java
        mixin
        IAssertion.java
        MixinModelManipulationTests.java
        MixinProjectIsolationTests.java
        MixinTestsSuite.java
        RubyMixinClassTests.java
        ThreadedMixinTests.java
        text
        completion
        RubyCompletionTests.java
        typehierarchy
        TypeHierarchyTests.java
        typeinference
        AbstractTypeInferencingTests.java
        IAssertion.java
        MethodsTest.java
        SimpleTest.java
        StatementsTest.java
        TypeInferenceSuite.java
        TypeInferenceTest.java
        VariablesTest.java
        utils
        LocalVariablesInspectionTest.java
  - org.eclipse.dltk.ruby.debug.tests
    - src
      - org
        eclipse
        dltk
        ruby
        debug
        tests
        Activator.java
        AllTests.java
        console
        RubyFilenameLinenumberTests.java
        launching
        RubyLaunchingTests.java
  - org.eclipse.dltk.ruby.formatter.tests
    - src
      - org
        eclipse
        dltk
        ruby
        formatter
        tests
        AbstractRubyFormatterTest.java
        AllTests.java
        BeginTest.java
        BlocksTest.java
        ClassesAndMethodsTest.java
        CommentWrapTests.java
        CommentsTest.java
        DoTest.java
        FormatRubyLibTest.java
        HereDocumentTest.java
        IfTest.java
        ModulesTest.java
        ParserTest.java
        RDocTest.java
        RegexpTest.java
        RubyDoc1Test.java
        RubyDoc2Test.java
        RubyFormatterTestsPlugin.java
        SimpleTests.java
        StringsTest.java
        TestRubyFormatter.java
        UnaryPlusTest.java
  - org.eclipse.dltk.ruby.testing.tests
    - src
      - org
        eclipse
        dltk
        ruby
        testing
        tests
        AllTests.java
        RubyTestingTestUnitResolverTests.java
  - org.eclipse.dltk.ruby.ui.tests
    - src
      - org
        eclipse
        dltk
        ruby
        ui
        tests
        AllTests.java
        folding
        RubyCommentElementTests.java
        RubyFoldingTest.java
        indenting
        RubyAutoIndentStrategyTest.java
        internal
        RubyUITestsPlugin.java
        TestUtils.java
        search
        MixinCompleteTests.java
        ThreadedUIMixinTests.java
        text
        PartitioningTest.java
        RubyRequireHyperlinkDetectorTest.java
        indenting
        IndentingTest.java
        jface
        text
        DocCmd.java

/***** BEGIN LICENSE BLOCK *****
 * Version: CPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Common Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/cpl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org>
 * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
 * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
 * Copyright (C) 2005 Zach Dennis <zdennis@mktec.com>
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the CPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the CPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/
package org.jruby.lexer.yacc;

import java.io.IOException;
import java.io.Reader;

import org.jruby.util.ByteList;

/**
 * This class is what feeds the lexer.  It is primarily a wrapper around a
 * Reader that can unread() data back onto the source.  Originally, I thought
 * about using the PushBackReader to handle read/unread, but I realized that
 * some extremely pathological case could overflow the pushback buffer.  Better
 * safe than sorry.  I could have combined this implementation with a 
 * PushbackBuffer, but the added complexity did not seem worth it.
 * 
 * @author enebo
 */
public class LexerSource {
    private static final int INITIAL_PUSHBACK_SIZE = 100;
    private static final int INITIAL_LINEWIDTH_SIZE = 2048;
    
	// Where we get new positions from.
	private ISourcePositionFactory positionFactory;
	
	// Where we get our newest char's
    private final Reader reader;
    
    // Our readback/pushback buffer.
    private char buf[] = new char[INITIAL_PUSHBACK_SIZE];
    
    // index of last character in pushback buffer
    private int bufLength = -1;
    
    // How long is every line we have run across.  This makes it possible for us to unread() 
    // past a read() line and still know what column we are at.
    private int lineWidths[] = new int[INITIAL_LINEWIDTH_SIZE];

    // index of last line width in line widths list
    private int lineWidthsLength = -1;
    
    // The name of this source (e.g. a filename: foo.rb)
    private final String sourceName;
    
    // Number of newlines read from the reader
    private int line = 0;
    
    // Column of source.  
    private int column = 0;
    
    // How many bytes into the source are we?
    private int offset = 0;

    // Flag to let us now in next read after a newline that we should reset column
    private boolean nextCharIsOnANewLine = true;
	
    /**
     * Create our food-source for the lexer
     * 
     * @param sourceName is the file we are reading
     * @param reader is what represents the contents of file sourceName
     */
    public LexerSource(String sourceName, Reader reader) {
        this.sourceName = sourceName;
        this.reader = reader;
        this.positionFactory = new SourcePositionFactory(this);
    }
    
    public LexerSource(String sourceName, Reader reader, ISourcePositionFactory factory) {
        this.sourceName = sourceName;
        this.reader = reader;
        this.positionFactory = factory;
    }

    /**
     * Read next character from this source
     * 
     * @return next character to viewed by the source
     */
    public char read() throws IOException {
        int length = bufLength;
        char c;
    	
    	if (length >= 0) {
    		c = buf[bufLength--];
    	} else {
    		c = wrappedRead();
            
            // EOF...Do not advance column...Go straight to jail
            if (c == 0) {
                //offset++;
                return c;
            }
    	}

    	// Reset column back to zero on first read of a line (note it will be-
    	// come '1' by the time it leaves read().
    	if (nextCharIsOnANewLine) {
    		nextCharIsOnANewLine = false;
    		column = 0;
    	}
    	
    	offset++;
    	column++;
    	if (c == '\n') {
    		line++;
    		// Since we are not reading off of unread buffer we must at the
    		// end of a new line for the first time.  Add it.
    		if (length < 0) {
                lineWidths[++lineWidthsLength] = column;
                // If we outgrow our lineLength list then grow it
                if (lineWidthsLength + 1 == lineWidths.length) {
                    int[] newLineWidths = new int[lineWidths.length + INITIAL_LINEWIDTH_SIZE];
                        
                    System.arraycopy(lineWidths, 0, newLineWidths, 0, lineWidths.length);
                        
                    lineWidths = newLineWidths;
                }                
    		}
    		
    		nextCharIsOnANewLine = true;
        } 
            
    	return c; 
    }

    /**
     * Pushes char back onto this source.  Note, this also
     * allows us to push whatever is passes back into the source.
     * 
     * @param c to be put back onto the source
     */
    public void unread(char c) {
        if (c != (char) 0) {
            offset--;
    	
            if (c == '\n') {
                line--;
                column = lineWidths[line];
                nextCharIsOnANewLine = true;
            } else {
                column--;
            }

            buf[++bufLength] = c;
            // If we outgrow our pushback stack then grow it (this should only happen in
            // pretty pathological cases).
            if (bufLength + 1 == buf.length) {
                char[] newBuf = new char[buf.length + INITIAL_PUSHBACK_SIZE];
                
                System.arraycopy(buf, 0, newBuf, 0, buf.length);
                
                buf = newBuf;                
            }
        }
    }
    
    public boolean peek(char to) throws IOException {
        char c = read();
        unread(c);
        return c == to;
    }
    
    /**
     * What file are we lexing?
     * @return the files name
     */
    public String getFilename() {
    	return sourceName;
    }
    
    /**
     * What line are we at?
     * @return the line number 0...line_size-1
     */
    public int getLine() {
    	return line;
    }
    
    /**
     * Are we at beggining of line?
     * 
     * @return the column (0..x)
     */
    public int getColumn() {
    	return column;
    }
    
    /**
     * The location of the last byte we read from the source.
     * 
     * @return current location of source
     */
    public int getOffset() {
    	return (offset <= 0 ? 0 : offset);
    }

    /**
     * Where is the reader within the source {filename,row}
     * 
     * @return the current position
     */
    public ISourcePosition getPosition(ISourcePosition startPosition, boolean inclusive) {
    	return positionFactory.getPosition(startPosition, inclusive);
    }
    
    /**
     * Where is the reader within the source {filename,row}
     * 
     * @return the current position
     */
    public ISourcePosition getPosition() {
    	return positionFactory.getPosition(null, false);
    }
    
    public ISourcePositionFactory getPositionFactory() {
        return positionFactory;
    }

    /**
     * Convenience method to hide exception.  If we do hit an exception
     * we will pretend we EOF'd.
     * 
     * @return the current char or EOF (at EOF or on error)
     */
    private char wrappedRead() throws IOException {
        int c = reader.read();

        // If \r\n then just pass along \n (windows)
        // If \r[^\n] then pass along \n (MAC)
        if (c == '\r') {
            if ((c = reader.read()) != '\n') {
                unread((char)c);
                c = '\n';
            } else {
                // Position within source must reflect the actual offset and column.  Since
                // we ate an extra character here (this accounting is normally done in read
                // ), we should update position info.
                offset++;
                column++;
            }
        }
                   
        return c != -1 ? (char) c : '\0';
    }
    
    /**
     * Create a source.
     * 
     * @param name the name of the source (e.g a filename: foo.rb)
     * @param content the data of the source
     * @return the new source
     */
    public static LexerSource getSource(String name, Reader content) {
        return new LexerSource(name, content);
    }

    public String readLine() throws IOException {
        StringBuffer sb = new StringBuffer(80);
        for (char c = read(); c != '\n' && c != '\0'; c = read()) {
            sb.append(c);
        }
        return sb.toString();
    }
    
    public ByteList readLineBytes() throws IOException {
        ByteList bytelist = new ByteList(80);

        for (char c = read(); c != '\n' && c != '\0'; c = read()) {
            bytelist.append(c);
        }
        return bytelist;
    }

    public void unreadMany(CharSequence buffer) {
    	int length = buffer.length();
        for (int i = length - 1; i >= 0; i--) {
            unread(buffer.charAt(i));
        }
    }

    public boolean matchString(String match, boolean indent) throws IOException {
        int length = match.length();
        StringBuffer buffer = new StringBuffer(length + 20);
        
        if (indent) {
        	char c;
        	while ((c = read()) != '\0') {
        		if (!Character.isWhitespace(c)) {
        			unread(c);
        			break;
        		}
            	buffer.append(c);
        	}
        }
        
        for (int i = 0; i < length; i++) {
            char c = read();
            buffer.append(c);
            if (match.charAt(i) != c) {
                unreadMany(buffer);
                return false;
            }
        }
        return true;
    }

    public boolean wasBeginOfLine() {
        return getColumn() == 1;
    }

    public char readEscape() throws IOException {
        char c = read();

        switch (c) {
            case '\\' : // backslash
                return c;
            case 'n' : // newline
                return '\n';
            case 't' : // horizontal tab
                return '\t';
            case 'r' : // carriage return
                return '\r';
            case 'f' : // form feed
                return '\f';
            case 'v' : // vertical tab
                return '\u000B';
            case 'a' : // alarm(bell)
                return '\u0007';
            case 'e' : // escape
                return '\u001B';
            case '0' : case '1' : case '2' : case '3' : // octal constant
            case '4' : case '5' : case '6' : case '7' :
                unread(c);
                return scanOct(3);
            case 'x' : // hex constant
            	int hexOffset = getColumn();
            	char hexValue = scanHex(2);
            	
            	// No hex value after the 'x'.
            	if (hexOffset == getColumn()) {
            	    throw new SyntaxException(getPosition(), "Invalid escape character syntax");
            	}
                return hexValue;
            case 'b' : // backspace
                return '\010';
            case 's' : // space
                return ' ';
            case 'M' :
                if ((c = read()) != '-') {
                    throw new SyntaxException(getPosition(), "Invalid escape character syntax");
                } else if ((c = read()) == '\\') {
                    return (char) (readEscape() | 0x80);
                } else if (c == '\0') {
                    throw new SyntaxException(getPosition(), "Invalid escape character syntax");
                } 
                return (char) ((c & 0xff) | 0x80);
            case 'C' :
                if ((c = read()) != '-') {
                    throw new SyntaxException(getPosition(), "Invalid escape character syntax");
                }
            case 'c' :
                if ((c = read()) == '\\') {
                    c = readEscape();
                } else if (c == '?') {
                    return '\u0177';
                } else if (c == '\0') {
                    throw new SyntaxException(getPosition(), "Invalid escape character syntax");
                }
                return (char) (c & 0x9f);
            case '\0' :
                throw new SyntaxException(getPosition(), "Invalid escape character syntax");
            default :
                return c;
        }
    }

    private char scanHex(int count) throws IOException {
    	char value = '\0';

    	for (int i = 0; i < count; i++) {
    		char c = read();

    		if (!RubyYaccLexer.isHexChar(c)) {
        		unread(c);
    			break;
    		}

    		value <<= 4;
    		value |= Integer.parseInt(""+c, 16) & 15;
    	}

    	return value;
    }

    private char scanOct(int count) throws IOException {
    	char value = '\0';

    	for (int i = 0; i < count; i++) {
    		char c = read();

    		if (!RubyYaccLexer.isOctChar(c)) {
        		unread(c);
    			break;
    		}

    		value <<= 3;
    		value |= Integer.parseInt(""+c, 8);
    	}

    	return value;
    }

    /**
     * Get character ahead of current position by offset positions.
     * 
     * @param anOffset is location past current position to get char at
     * @return character index positions ahead of source location or EOF
     */
    public char getCharAt(int anOffset) throws IOException {
    	StringBuffer buffer = new StringBuffer(anOffset);
    
    	// read next offset chars
        for (int i = 0; i < anOffset; i++) {
            buffer.append(read());
        }
        
        int length = buffer.length();
        
        // Whoops not enough chars left EOF!
        if (length == 0){
        	return '\0';
        }
        
        // Push chars back now that we found it
        for (int i = 0; i < length; i++) {
            unread(buffer.charAt(i));
        }
        
        return buffer.charAt(length - 1);
    }

    public String toString() {
        try {
            StringBuffer buffer = new StringBuffer(20);
            for (int i = 0; i < 20; i++) {
                buffer.append(read());
            }
            for (int i = 0; i < 20; i++) {
                unread(buffer.charAt(buffer.length() - i - 1));
            }
            buffer.append(" ...");
            return buffer.toString();
        } catch(Exception e) {
            return null;
        }
    }
}