/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright 1997-2010 Oracle and/or its affiliates. All rights reserved.
*
* Oracle and Java are registered trademarks of Oracle and/or its affiliates.
* Other names may be trademarks of their respective owners.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common
* Development and Distribution License("CDDL") (collectively, the
* "License"). You may not use this file except in compliance with the
* License. You can obtain a copy of the License at
* http://www.netbeans.org/cddl-gplv2.html
* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
* specific language governing permissions and limitations under the
* License. When distributing the software, include this License Header
* Notice in each file and include the License file at
* nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the GPL Version 2 section of the License file that
* accompanied this code. If applicable, add the following below the
* License Header, with the fields enclosed by brackets [] replaced by
* your own identifying information:
* "Portions Copyrighted [year] [name of copyright owner]"
*
* Contributor(s):
*
* The Original Software is NetBeans. The Initial Developer of the Original
* Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
* Microsystems, Inc. All Rights Reserved.
*
* If you wish your version of this file to be governed by only the CDDL
* or only the GPL Version 2, indicate your decision by adding
* "[Contributor] elects to include this software in this distribution
* under the [CDDL or GPL Version 2] license." If you do not indicate a
* single choice of license, a recipient has the option to distribute
* your version of this file under either the CDDL, the GPL Version 2 or
* to extend the choice of license to its licensees as provided above.
* However, if you add GPL Version 2 code and therefore, elected the GPL
* Version 2 license, then the option applies only if the new code is
* made subject to such option by the copyright holder.
*/
package org.netbeans.modules.ruby.lexer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.prefs.BackingStoreException;
import java.util.prefs.NodeChangeEvent;
import java.util.prefs.NodeChangeListener;
import java.util.prefs.PreferenceChangeEvent;
import java.util.prefs.PreferenceChangeListener;
import java.util.prefs.Preferences;
import org.netbeans.api.lexer.Token;
import org.netbeans.spi.lexer.Lexer;
import org.netbeans.spi.lexer.LexerInput;
import org.netbeans.spi.lexer.LexerRestartInfo;
import org.netbeans.spi.lexer.TokenFactory;
import org.openide.ErrorManager;
import org.openide.util.NbPreferences;
/**
* Lexical analyzer for Ruby comments which identifies TODO markers
* and highlights them specially.
*
* @todo Handle rdoc on/off directives (#++,#--). Since these occur on separate
* lines I can't handle it now.
* @todo Highlight only RDoc reserved words, or all that fit the pattern? For
* now I'm highlighting :\w+: sequences. Possibly I should only highlight
* @todo ___ shows up as an italic "_" - that aint right
* @todo Tokenize Ruby-style symbols (:foo) and use the ruby color preferences?
*
* @author Tor Norbye
*/
public final class RubyCommentLexer implements Lexer<RubyCommentTokenId> {
private static final int EOF = LexerInput.EOF;
private static final String[] RDOC_DIRECTIVES =
{
"arg", "args", "yield", "yields", "notnew", "not-new", "not_new", "doc", "nodoc",
"stopdoc", "startdoc", "enddoc", "main", "title", "section", "include"
};
private final LexerInput input;
private final TokenFactory<RubyCommentTokenId> tokenFactory;
private String[] markers;
public RubyCommentLexer(LexerRestartInfo<RubyCommentTokenId> info) {
this.input = info.input();
this.tokenFactory = info.tokenFactory();
assert (info.state() == null); // passed argument always null
}
public Object state() {
return null;
}
/**
* Compute the set of markers to scan for in the user source code.
* The code tries to look for the same markers used by the TODO module
* in case the user has customized the set. (However, it is doing this
* by peeking at the Preferences possibly left by the docscan module,
* rather than having a contract API with it, based on
* tasklist/docscan/src/org/netbeans/modules/tasklist/docscan/Settings.java)
*/
private synchronized String[] getTodoMarkers() {
if (markers == null) {
final String TODO_MARKERS_KEY = "patterns"; // NOI18N
Preferences preferences =
NbPreferences.root().node("/org/netbeans/modules/tasklist/todo"); // NOI18N
preferences.addPreferenceChangeListener(new PreferenceChangeListener() {
@Override
public void preferenceChange(PreferenceChangeEvent evt) {
synchronized (RubyCommentLexer.this) {
markers = null;
}
}
});
List<String> markerList = new ArrayList<String>();
markerList.addAll(Arrays.asList(preferences.get(TODO_MARKERS_KEY, "").split("\\|")));
if (!markerList.isEmpty()) {
markerList.remove("@todo"); // Applies to javadoc, and these tags are now colorized separately
for (Iterator<String> it = markerList.iterator(); it.hasNext();) {
if (it.next().trim().isEmpty()) {
it.remove();
}
}
markers = markerList.toArray(new String[markerList.size()]);
} else {
// Additional candidates: HACK, WORKAROUND, REMOVE, OLD
markers = new String[] { "TODO", "FIXME", "XXX", "PENDING" }; // NOI18N
}
}
return markers;
}
public Preferences getDocscanPreferences() {
return NbPreferences.root().node("org/netbeans/modules/tasklist/docscan");
}
public Token<RubyCommentTokenId> nextToken() {
boolean inWord = false;
inputLoop:
while (true) {
int ch = input.read();
switch (ch) {
case EOF: {
if (input.readLength() > 0) {
return token(RubyCommentTokenId.COMMENT_TEXT);
} else {
return null;
}
}
case '\\':
// The next character is escaped...
input.read();
continue;
case '\n':
return token(RubyCommentTokenId.COMMENT_TEXT);
case '#': { // Linked method
// See if this is a method reference. It can be either "#method" or "Class#method".
// If the input is something like " #" we need to chop it off to start at "#"; if
// it's something like "foo Bar#baz" we need to chop it off at "Bar#baz", and
// if it's something impossible like " foo#bar" we can ignore it completely (the class
// must be uppercase).
CharSequence s = input.readText();
int classIndex = s.length()-1;
assert s.charAt(classIndex) == '#';
for (classIndex--; classIndex >= 0; classIndex--) {
char c = s.charAt(classIndex);
if (!Character.isJavaIdentifierPart(c) && c != '_' && c != ':') {
// The next character needs to be "#" or an uppercase character
assert classIndex < s.length()-1;
char next = s.charAt(classIndex+1);
if (!(next == '#' || Character.isUpperCase(next))) {
// This "#" is not in an Upper# sequence
// just continue processing input
continue inputLoop;
}
break;
}
}
// Make sure uppercase
if (classIndex == -1) {
// It's the beginning of input - we're okay
char next = s.charAt(0);
if (!(next == '#' || Character.isUpperCase(next))) {
break;
}
} else {
input.backup(input.readLength()-(classIndex+1));
return token(RubyCommentTokenId.COMMENT_TEXT);
}
int originalLength = input.readLength();
// See if we have what looks like a method name:
// method-only characters followed by whitespace, newlines or EOF:
boolean seenSuffixChar = false;
boolean seenPrefixChar = false;
while (ch != EOF) {
ch = input.read();
if (ch == '$' || ch == '@') {
// TODO - what do I do here?
seenPrefixChar = true;
} else if (ch == '?' || ch == '=' || ch == '!') {
seenSuffixChar = true;
} else if (ch == ':' || Character.isJavaIdentifierPart(ch)) {
if (seenSuffixChar) {
// These are only allowed at the end
break;
}
continue;
} else {
input.backup(1);
break;
}
}
if (Character.isWhitespace(ch) || (ch == EOF) || (ch == '.') || (ch == ',') ||
(ch == ')') || (ch == '}') || (ch == '(')) {
if (input.readLength() > 2 && input.readLength() > originalLength) {
return token(RubyCommentTokenId.COMMENT_LINK);
}
}
break;
}
case 'f': // ftp:
case 'm': // mailto:
case 'w': // www.
case 'h': { // http links. TODO: link:, ftp:, mailto:, and www.
if (inWord) {
break;
}
int originalLength = input.readLength();
boolean foundLinkBegin = false;
if (ch == 'h') { // http:
if (input.read() == 't') {
if (input.read() == 't') {
if (input.read() == 'p') {
int next = input.read();
switch (next) {
case ':':
foundLinkBegin = true;
break;
case 's':
if (input.read() == ':') {
foundLinkBegin = true;
} else {
input.backup(5);
}
break;
default:
input.backup(4);
break;
}
} else {
input.backup(3);
}
} else {
input.backup(2);
}
} else {
input.backup(1);
}
} else if (ch == 'f') { // ftp:
if (input.read() == 't') {
if (input.read() == 'p') {
if (input.read() == ':') {
foundLinkBegin = true;
} else {
input.backup(3);
}
} else {
input.backup(2);
}
} else {
input.backup(1);
}
} else if (ch == 'm') { // mailto:
if (input.read() == 'a') {
if (input.read() == 'i') {
if (input.read() == 'l') {
if (input.read() == 't') {
if (input.read() == 'o') {
if (input.read() == ':') {
foundLinkBegin = true;
} else {
input.backup(6);
}
} else {
input.backup(5);
}
} else {
input.backup(4);
}
} else {
input.backup(3);
}
} else {
input.backup(2);
}
} else {
input.backup(1);
}
} else if (ch == 'w') { // www.
if (input.read() == 'w') {
if (input.read() == 'w') {
if (input.read() == '.') {
foundLinkBegin = true;
} else {
input.backup(3);
}
} else {
input.backup(2);
}
} else {
input.backup(1);
}
}
if (foundLinkBegin) {
while (ch != EOF) {
ch = input.read();
if ((ch == ']') || (ch == ')') || Character.isWhitespace(ch) ||
(ch == '\'') || (ch == '"')) {
input.backup(1);
break;
}
}
if (originalLength > 1) {
input.backup(input.readLengthEOF() - originalLength + 1);
return token(RubyCommentTokenId.COMMENT_TEXT);
}
if (input.readLength() > 2) {
return token(RubyCommentTokenId.COMMENT_LINK);
}
}
break;
}
case '_': // Italic text
if (inWord) {
break;
}
if (input.readLength() > 1) {
input.backup(1);
return token(RubyCommentTokenId.COMMENT_TEXT);
}
while (ch != EOF) {
ch = input.read();
if (ch == '_') {
int next = input.read();
input.backup(1);
if (Character.isLetter(next) || (next == '_')) {
continue;
}
if (input.readLength() > 2) {
return token(RubyCommentTokenId.COMMENT_ITALIC);
}
} else if (!(Character.isLetter(ch) || (ch == '_'))) {
input.backup(1);
break;
}
}
break;
case '*': // Bold text
if (inWord) {
break;
}
if (input.readLength() > 1) {
input.backup(1);
return token(RubyCommentTokenId.COMMENT_TEXT);
}
while (ch != EOF) {
ch = input.read();
if ((ch == '*') && (input.readLength() > 2)) {
return token(RubyCommentTokenId.COMMENT_BOLD);
} else if (!(Character.isLetter(ch) || (ch == '_'))) {
input.backup(1);
break;
}
}
break;
case '+': // Typewriter text
if (inWord) {
break;
}
if (input.readLength() > 1) {
input.backup(1);
return token(RubyCommentTokenId.COMMENT_TEXT);
}
while (ch != EOF) {
ch = input.read();
if ((ch == '+') && (input.readLength() > 2)) {
return token(RubyCommentTokenId.COMMENT_HTMLTAG);
} else if (!(Character.isLetter(ch) || (ch == '_') || (ch == ':'))) { // ':' e.g. +::Module++
input.backup(1);
break;
}
}
break;
case '<': { // Html tag - rdoc
// Only accept things that look like tags: <foo> or </foo>, not
// <<, < >, etc.
int next = input.read();
input.backup(1);
if (!((next == '/') || Character.isLetter(next))) {
break;
}
if (input.readLength() > 1) {
input.backup(1);
return token(RubyCommentTokenId.COMMENT_TEXT);
}
while (ch != EOF) {
ch = input.read();
if (ch == '\n') {
break;
} else if (ch == '>') {
return token(RubyCommentTokenId.COMMENT_HTMLTAG);
}
}
break;
}
case ':': { // Possible rdoc tag, like :nodoc:
ch = input.read(); // input.readText()
if (ch == ':') {
// :: - possibly part of something like Foo::Bar
continue;
} else {
input.backup(1);
if (input.readText().toString().endsWith("::")) {
continue;
}
}
if (input.readLength() > 1) {
input.backup(1);
return token(RubyCommentTokenId.COMMENT_TEXT);
}
int backup = 0;
while (ch != EOF) {
ch = input.read();
backup++;
if ((ch == '\n') || (!Character.isLetter(ch) && (ch != '_') && (ch != '-'))) {
if ((ch == ':') && (input.readLength() > 2)) { // Don't recognize "::" since it's used a lot when mentioning modules
// I should be able to use input.readText(1, ...) here but it doesn't work right
String seen = input.readText().toString();
String directive = seen.substring(1, seen.length() - 1);
for (String keyword : RDOC_DIRECTIVES) {
if (keyword.equals(directive)) {
return token(RubyCommentTokenId.COMMENT_RDOC);
}
}
}
input.backup(backup);
break;
}
}
continue;
}
default: {
if (!inWord) {
// See if we have a match from here on for any of the markers
String[] todoMarkers = getTodoMarkers();
for (int i = 0; i < todoMarkers.length; i++) {
if (todoMarkers[i].charAt(0) == ch) {
if (input.readLength() > 1) {
input.backup(1);
return token(RubyCommentTokenId.COMMENT_TEXT);
}
// Possible match!
// Read ahead while matching further characters, but if they
// stop matching, back up and try another
int backup = 0;
String marker = todoMarkers[i];
for (int c = 1, n = marker.length(); c < n; c++) {
backup++;
if (input.read() != marker.charAt(c)) {
input.backup(backup);
break;
}
}
if (backup == (marker.length() - 1)) { // Found it
// Peek ahead and make sure this match is a whole word
boolean separate = !Character.isJavaIdentifierPart(input.read());
input.backup(1);
if (separate) {
return tokenFactory.createToken(RubyCommentTokenId.COMMENT_TODO,
input.readLength());
}
}
}
}
}
}
}
inWord = Character.isJavaIdentifierPart(ch);
}
}
private Token<RubyCommentTokenId> token(RubyCommentTokenId id) {
return tokenFactory.createToken(id);
}
public void release() {
}
}