StringLiteralProcessor.java example

Explorer
visage-compiler-master
/*
 * Copyright 2008-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */

package org.visage.tools.antlr;

import com.sun.tools.mjavac.util.Log;
import org.visage.tools.util.MsgSym;

/**
 * Convert escapes in string literals
 * 
 * @author Robert Field
 * 
 * Stolen liberally from the javac Scanner
 */

public class StringLiteralProcessor {

    private final int basepos;
    
    /** A character buffer for literals.
     */
    private char[] sbuf;
    private int sp;

    /** The input buffer, index of next chacter to be read,
     *  index of one past last character in buffer.
     */
    private final char[] buf;
    private int bp;
    private final int buflen;
    
    /** The current character.
     */
    private char ch;

    /** The buffer index of the last converted unicode character
     */
    private int unicodeConversionBp = -1;

    /** The log to be used for error reporting.
     */
    private final Log log;

    public static String convert(Log log, int pos, String str) {
        StringLiteralProcessor slp = new StringLiteralProcessor(log, pos, str);
        return slp.convert();
    }
    
    private StringLiteralProcessor(Log log, int pos, String str) {
        this.log = log;
        this.basepos = pos;
    	this.buf = str.toCharArray();
    	this.buflen = buf.length - 1;  // skip trailing quote
    	this.bp = 0; // scanChar pre-increments so we skip the leading quote
    	this.sbuf = new char[buflen];
    	this.sp = 0;
    }
    
    private String convert() {
        scanChar();
        while (bp < buflen) {
            scanLitChar();
        }
        return new String(sbuf, 0, sp);
    }

    /** Report an error at the given position using the provided arguments.
     */
    private void lexError(String key, Object... args) {
        log.error(basepos+bp-1-buflen, key, args);
    }

    /** Convert an ASCII digit from its base (8, 10, or 16)
     *  to its value.
     */
    private int digit(int base) {
        char c = ch;
        int result = Character.digit(c, base);
        if (result >= 0 && c > 0x7f) {
            lexError(MsgSym.MESSAGE_ILLEGAL_NONASCII_DIGIT);
            ch = "0123456789abcdef".charAt(result);
        }
        return result;
    }


    /** Append a character to sbuf.
     */
    private void putChar(char ch) {
        if (sp == sbuf.length) {
            char[] newsbuf = new char[sbuf.length * 2];
            System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
            sbuf = newsbuf;
        }
        sbuf[sp++] = ch;
    }

    /** Convert unicode escape; bp points to initial '\' character
     *  (Spec 3.3).
     */
    private void convertUnicode() {
        if (ch == '\\' && unicodeConversionBp != bp) {
            bp++; ch = buf[bp];
            if (ch == 'u') {
                do {
                    bp++; ch = buf[bp];
                } while (ch == 'u');
                int limit = bp + 3;
                if (limit < buflen) {
                    int d = digit(16);
                    int code = d;
                    while (bp < limit && d >= 0) {
                        bp++; ch = buf[bp];
                        d = digit(16);
                        code = (code << 4) + d;
                    }
                    if (d >= 0) {
                        ch = (char)code;
                        unicodeConversionBp = bp;
                        return;
                    }
                }
                lexError(MsgSym.MESSAGE_ILLEGAL_UNICODE_ESC);
            } else {
                bp--;
                ch = '\\';
            }
        }
    }

    /** Read next character.
     */
    private void scanChar() {
        ch = buf[++bp];
        if (ch == '\\') {
            convertUnicode();
        }
    }

    /** Read next character in character or string literal and copy into sbuf.
     */
    private void scanLitChar() {
        if (ch == '\\') {
            if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
                bp++;
                putChar('\\');
                scanChar();
            } else {
                scanChar();
                switch (ch) {
                case '0': case '1': case '2': case '3':
                case '4': case '5': case '6': case '7':
                    char leadch = ch;
                    int oct = digit(8);
                    scanChar();
                    if ('0' <= ch && ch <= '7') {
                        oct = oct * 8 + digit(8);
                        scanChar();
                        if (leadch <= '3' && '0' <= ch && ch <= '7') {
                            oct = oct * 8 + digit(8);
                            scanChar();
                        }
                    }
                    putChar((char)oct);
                    break;
                case 'b':
                    putChar('\b'); scanChar(); break;
                case 't':
                    putChar('\t'); scanChar(); break;
                case 'n':
                    putChar('\n'); scanChar(); break;
                case 'f':
                    putChar('\f'); scanChar(); break;
                case 'r':
                    putChar('\r'); scanChar(); break;
                case '\'':
                    putChar('\''); scanChar(); break;
                case '\"':
                    putChar('\"'); scanChar(); break;
                case '{':
                    putChar('{'); scanChar(); break;
                case '}':
                    putChar('}'); scanChar(); break;
                case '\\':
                    putChar('\\'); scanChar(); break;
                default:
                    lexError(MsgSym.MESSAGE_ILLEGAL_ESC_CHAR);
                }
            }
        } else if (bp != buflen) {
            putChar(ch); scanChar();
        }
    }
}