/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 1997-2010 Oracle and/or its affiliates. All rights reserved. * * Oracle and Java are registered trademarks of Oracle and/or its affiliates. * Other names may be trademarks of their respective owners. * * The contents of this file are subject to the terms of either the GNU * General Public License Version 2 only ("GPL") or the Common * Development and Distribution License("CDDL") (collectively, the * "License"). You may not use this file except in compliance with the * License. You can obtain a copy of the License at * http://www.netbeans.org/cddl-gplv2.html * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the * specific language governing permissions and limitations under the * License. When distributing the software, include this License Header * Notice in each file and include the License file at * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the GPL Version 2 section of the License file that * accompanied this code. If applicable, add the following below the * License Header, with the fields enclosed by brackets [] replaced by * your own identifying information: * "Portions Copyrighted [year] [name of copyright owner]" * * Contributor(s): * * The Original Software is NetBeans. The Initial Developer of the Original * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun * Microsystems, Inc. All Rights Reserved. * * If you wish your version of this file to be governed by only the CDDL * or only the GPL Version 2, indicate your decision by adding * "[Contributor] elects to include this software in this distribution * under the [CDDL or GPL Version 2] license." If you do not indicate a * single choice of license, a recipient has the option to distribute * your version of this file under either the CDDL, the GPL Version 2 or * to extend the choice of license to its licensees as provided above. * However, if you add GPL Version 2 code and therefore, elected the GPL * Version 2 license, then the option applies only if the new code is * made subject to such option by the copyright holder. */ package org.netbeans.modules.ruby.lexer; import org.netbeans.api.lexer.Token; import org.netbeans.spi.lexer.Lexer; import org.netbeans.spi.lexer.LexerInput; import org.netbeans.spi.lexer.LexerRestartInfo; import org.netbeans.spi.lexer.TokenFactory; /** * Lexical analyzer for Ruby quoted Strings * * @author Tor Norbye * @version 1.00 */ public final class RubyStringLexer implements Lexer<RubyStringTokenId> { private static final int EOF = LexerInput.EOF; private final LexerInput input; private final TokenFactory<RubyStringTokenId> tokenFactory; private final boolean substituting; /** * A Lexer for ruby strings * @param substituting If true, handle substitution rules for double quoted strings, otherwise * single quoted strings. */ public RubyStringLexer(LexerRestartInfo<RubyStringTokenId> info, boolean substituting) { this.input = info.input(); this.tokenFactory = info.tokenFactory(); this.substituting = substituting; assert (info.state() == null); // passed argument always null } public Object state() { return null; } public Token<RubyStringTokenId> nextToken() { return substituting ? nextTokenDoubleQuotes() : nextTokenSingleQuotes(); } public Token<RubyStringTokenId> nextTokenSingleQuotes() { while (true) { int ch = input.read(); switch (ch) { case EOF: if (input.readLength() > 0) { return token(RubyStringTokenId.STRING_TEXT); } else { return null; } case '\\': if (input.readLength() > 1) { // already read some text input.backup(1); return tokenFactory.createToken(RubyStringTokenId.STRING_TEXT, input.readLength()); } switch (ch = input.read()) { case '\\': case '\'': return token(RubyStringTokenId.STRING_ESCAPE); case '0': // \0 etc. is a common construction when dealing with regexps. case '1': // It's not likely people will be confused about these case '2': // not getting escaped. case '3': case '4': case '5': case '6': case '7': case '8': case '9': return token(RubyStringTokenId.STRING_TEXT); default: return token(RubyStringTokenId.STRING_INVALID); } } } } public Token<RubyStringTokenId> nextTokenDoubleQuotes() { while (true) { int ch = input.read(); switch (ch) { case EOF: if (input.readLength() > 0) { return token(RubyStringTokenId.STRING_TEXT); } else { return null; } // #{code} = Value of code case '#': int f = input.read(); if (f == '{') { //if (input.read() == '{') { if (input.readLength() > 2) { // already read some text input.backup(2); return tokenFactory.createToken(RubyStringTokenId.STRING_TEXT, input.readLength()); } // Look for matching }... // TODO: Figure out if I need to do anything else here, // e.g. avoid escapes and such int c; while (true) { c = input.read(); if ((c == EOF) || (c == '}')) { break; } } return token(RubyStringTokenId.EMBEDDED_RUBY); } else { continue; } case '\\': if (input.readLength() > 1) { // already read some text input.backup(1); return tokenFactory.createToken(RubyStringTokenId.STRING_TEXT, input.readLength()); } switch (ch = input.read()) { // In general, \x = x // Thus, just special case out the exceptions // Hex escape: \xnn = Hex nn case 'x': if (isHexDigit(input.read())) { if (isHexDigit(input.read())) { return token(RubyStringTokenId.STRING_ESCAPE); // valid unicode } else { input.backup(2); } } else { input.backup(1); } break; // Octal escape: \nnn = Octal nnn case '0': case '1': case '2': case '3': switch (input.read()) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': switch (input.read()) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return token(RubyStringTokenId.STRING_ESCAPE); // valid octal escape } input.backup(1); continue; } input.backup(1); continue; // Just a \0 etc -> 0 // \cx = Control-x case 'c': { // If the next character is x, or -x, then it's a single sequence int next = input.read(); if (next == 'x') { return token(RubyStringTokenId.STRING_ESCAPE); } else { input.backup(1); } continue; } // \C-x = Control-x case 'C': { int next = input.read(); if (next == '-') { next = input.read(); if (next == 'x') { return token(RubyStringTokenId.STRING_ESCAPE); } else { input.backup(2); } } else { input.backup(1); } continue; } // \M-x = Meta-x case 'M': { int next = input.read(); if (next == '-') { next = input.read(); if (next == 'x') { return token(RubyStringTokenId.STRING_ESCAPE); } else { input.backup(2); } } else { input.backup(1); } continue; } // TODO // Meta-control-x: \M-\C-x //case 'M': // return; default: // There are lots of special escapes: \a, \b, \e, etc. // but we don't need to actually substitute these, since // lexically they have the same form as \x (which is = x), // so treat these all the same: return token(RubyStringTokenId.STRING_ESCAPE); } } } } private static boolean isHexDigit(int c) { return Character.isDigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); } private Token<RubyStringTokenId> token(RubyStringTokenId id) { return tokenFactory.createToken(id); } public void release() { } }