/* * Copyright 2003-2011 JetBrains s.r.o. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jetbrains.mps.ide.java; import com.intellij.lexer.LexerBase; import com.intellij.openapi.diagnostic.Logger; import com.intellij.psi.tree.IElementType; import com.intellij.util.text.CharArrayCharSequence; import com.intellij.util.text.CharArrayUtil; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; public class JavaLexer extends LexerBase { public JavaLexer() { this(true, true); } private JavaLexer(boolean isAssertKeywordEnabled, boolean isJDK15) { myTable = isAssertKeywordEnabled ? (isJDK15 ? ourTableWithAssertAndJDK15 : ourTableWithAssert) : (isJDK15 ? ourTableWithJDK15 : ourTableWithoutAssert); myFlexlexer = new _JavaLexer(isAssertKeywordEnabled, isJDK15); } private CharSequence myBuffer; private char[] myBufferArray; private int myBufferIndex; private int myBufferEndOffset; IElementType myTokenType; private _JavaLexer myFlexlexer; //Positioned after the last symbol of the current token private int myTokenEndOffset; private final static class HashTable { static final int NUM_ENTRIES = 999; private static final Logger LOG = Logger.getInstance("com.intellij.Lexer.JavaLexer"); final char[][] myTable = new char[NUM_ENTRIES][]; final IElementType[] myKeywords = new IElementType[NUM_ENTRIES]; void add(String s, IElementType tokenType) { char[] chars = s.toCharArray(); int hashCode = chars[0] * 2; for (int j = 1; j < chars.length; j++) { hashCode += chars[j]; } int modHashCode = hashCode % NUM_ENTRIES; LOG.assertTrue(myTable[modHashCode] == null); myTable[modHashCode] = chars; myKeywords[modHashCode] = tokenType; } boolean contains(int hashCode, final CharSequence buffer, final char[] bufferArray, int offset) { int modHashCode = hashCode % NUM_ENTRIES; final char[] kwd = myTable[modHashCode]; if (kwd == null) return false; if (bufferArray != null) { for (int j = 0; j < kwd.length; j++) { if (bufferArray[j + offset] != kwd[j]) return false; } } else { for (int j = 0; j < kwd.length; j++) { if (buffer.charAt(j + offset) != kwd[j]) return false; } } return true; } IElementType getTokenType(int hashCode) { return myKeywords[hashCode % NUM_ENTRIES]; } @SuppressWarnings({"HardCodedStringLiteral"}) public HashTable(boolean isAssertKeywordEnabled, boolean isJDK15) { if (isAssertKeywordEnabled) { add("assert", JavaTokenType.ASSERT_KEYWORD); } if (isJDK15) { add("enum", JavaTokenType.ENUM_KEYWORD); } add("abstract", JavaTokenType.ABSTRACT_KEYWORD); add("default", JavaTokenType.DEFAULT_KEYWORD); add("if", JavaTokenType.IF_KEYWORD); add("private", JavaTokenType.PRIVATE_KEYWORD); add("this", JavaTokenType.THIS_KEYWORD); add("boolean", JavaTokenType.BOOLEAN_KEYWORD); add("do", JavaTokenType.DO_KEYWORD); add("implements", JavaTokenType.IMPLEMENTS_KEYWORD); add("protected", JavaTokenType.PROTECTED_KEYWORD); add("throw", JavaTokenType.THROW_KEYWORD); add("break", JavaTokenType.BREAK_KEYWORD); add("double", JavaTokenType.DOUBLE_KEYWORD); add("import", JavaTokenType.IMPORT_KEYWORD); add("public", JavaTokenType.PUBLIC_KEYWORD); add("throws", JavaTokenType.THROWS_KEYWORD); add("byte", JavaTokenType.BYTE_KEYWORD); add("else", JavaTokenType.ELSE_KEYWORD); add("instanceof", JavaTokenType.INSTANCEOF_KEYWORD); add("return", JavaTokenType.RETURN_KEYWORD); add("transient", JavaTokenType.TRANSIENT_KEYWORD); add("case", JavaTokenType.CASE_KEYWORD); add("extends", JavaTokenType.EXTENDS_KEYWORD); add("int", JavaTokenType.INT_KEYWORD); add("short", JavaTokenType.SHORT_KEYWORD); add("try", JavaTokenType.TRY_KEYWORD); add("catch", JavaTokenType.CATCH_KEYWORD); add("final", JavaTokenType.FINAL_KEYWORD); add("interface", JavaTokenType.INTERFACE_KEYWORD); add("static", JavaTokenType.STATIC_KEYWORD); add("void", JavaTokenType.VOID_KEYWORD); add("char", JavaTokenType.CHAR_KEYWORD); add("finally", JavaTokenType.FINALLY_KEYWORD); add("long", JavaTokenType.LONG_KEYWORD); add("strictfp", JavaTokenType.STRICTFP_KEYWORD); add("volatile", JavaTokenType.VOLATILE_KEYWORD); add("class", JavaTokenType.CLASS_KEYWORD); add("float", JavaTokenType.FLOAT_KEYWORD); add("native", JavaTokenType.NATIVE_KEYWORD); add("super", JavaTokenType.SUPER_KEYWORD); add("while", JavaTokenType.WHILE_KEYWORD); add("const", JavaTokenType.CONST_KEYWORD); add("for", JavaTokenType.FOR_KEYWORD); add("new", JavaTokenType.NEW_KEYWORD); add("switch", JavaTokenType.SWITCH_KEYWORD); add("continue", JavaTokenType.CONTINUE_KEYWORD); add("goto", JavaTokenType.GOTO_KEYWORD); add("package", JavaTokenType.PACKAGE_KEYWORD); add("synchronized", JavaTokenType.SYNCHRONIZED_KEYWORD); add("true", JavaTokenType.TRUE_KEYWORD); add("false", JavaTokenType.FALSE_KEYWORD); add("null", JavaTokenType.NULL_KEYWORD); } } private final HashTable myTable; private final static HashTable ourTableWithoutAssert = new HashTable(false, false); private final static HashTable ourTableWithAssert = new HashTable(true, false); private final static HashTable ourTableWithAssertAndJDK15 = new HashTable(true, true); private final static HashTable ourTableWithJDK15 = new HashTable(false, true); @Override public final void start(CharSequence buffer, int startOffset, int endOffset, int initialState) { myBuffer = buffer; myBufferArray = CharArrayUtil.fromSequenceWithoutCopying(buffer); myBufferIndex = startOffset; myBufferEndOffset = endOffset; myTokenType = null; myTokenEndOffset = startOffset; myFlexlexer.reset(myBuffer, startOffset, endOffset, 0); } public final void start(char[] buffer, int startOffset, int endOffset, int initialState) { start(new CharArrayCharSequence(buffer), startOffset, endOffset, initialState); } @Override public int getState() { return 0; } @Override public final IElementType getTokenType() { locateToken(); return myTokenType; } @Override public final int getTokenStart() { return myBufferIndex; } @Override public final int getTokenEnd() { locateToken(); return myTokenEndOffset; } @Override public final void advance() { locateToken(); myTokenType = null; } protected final void locateToken() { if (myTokenType != null) return; _locateToken(); } private void _locateToken() { if (myTokenEndOffset == myBufferEndOffset) { myTokenType = null; myBufferIndex = myBufferEndOffset; return; } myBufferIndex = myTokenEndOffset; final char c = myBufferArray != null ? myBufferArray[myBufferIndex] : myBuffer.charAt(myBufferIndex); switch (c) { default: flexLocateToken(); break; case ' ': case '\t': case '\n': case '\r': case '\f': myTokenType = JavaTokenType.WHITE_SPACE; myTokenEndOffset = getWhitespaces(myBufferIndex + 1); break; case '/': { if (myBufferIndex + 1 >= myBufferEndOffset) { myTokenType = JavaTokenType.DIV; myTokenEndOffset = myBufferEndOffset; } else { final char nextChar = myBufferArray != null ? myBufferArray[myBufferIndex + 1] : myBuffer.charAt(myBufferIndex + 1); if (nextChar == '/') { myTokenType = JavaTokenType.END_OF_LINE_COMMENT; myTokenEndOffset = getLineTerminator(myBufferIndex + 2); } else if (nextChar == '*') { if (myBufferIndex + 2 >= myBufferEndOffset || (myBufferArray != null ? myBufferArray[myBufferIndex + 2] : myBuffer.charAt(myBufferIndex + 2)) != '*') { myTokenType = JavaTokenType.C_STYLE_COMMENT; myTokenEndOffset = getClosingComment(myBufferIndex + 2); } else { myTokenType = JavaTokenType.DOC_COMMENT; myTokenEndOffset = getDocClosingComment(myBufferIndex + 3); } } else if ((c > 127) && Character.isJavaIdentifierStart(c)) { myTokenEndOffset = getIdentifier(myBufferIndex + 1); } else { flexLocateToken(); } } break; } case '"': case '\'': myTokenType = c == '"' ? JavaTokenType.STRING_LITERAL : JavaTokenType.CHARACTER_LITERAL; myTokenEndOffset = getClosingParenthesys(myBufferIndex + 1, c); } if (myTokenEndOffset > myBufferEndOffset) { myTokenEndOffset = myBufferEndOffset; } } private int getWhitespaces(int pos) { if (pos >= myBufferEndOffset) return myBufferEndOffset; final CharSequence lBuffer = myBuffer; final char[] lBufferArray = myBufferArray; final boolean hasArray = lBufferArray != null; char c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); while (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f') { pos++; if (pos == myBufferEndOffset) return pos; c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); } return pos; } private void flexLocateToken() { try { myFlexlexer.goTo(myBufferIndex); myTokenType = myFlexlexer.advance(); myTokenEndOffset = myFlexlexer.getTokenEnd(); } catch (IOException e) { // Can't be } } private int getClosingParenthesys(int offset, char c) { int pos = offset; final int lBufferEnd = myBufferEndOffset; if (pos >= lBufferEnd) return lBufferEnd; final CharSequence lBuffer = myBuffer; final char[] lBufferArray = myBufferArray; final boolean hasArray = lBufferArray != null; char cur = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); while (true) { while (cur != c && cur != '\n' && cur != '\r' && cur != '\\') { pos++; if (pos >= lBufferEnd) return lBufferEnd; cur = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); } if (cur == '\\') { pos++; if (pos >= lBufferEnd) return lBufferEnd; cur = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); if (cur == '\n' || cur == '\r') continue; pos++; if (pos >= lBufferEnd) return lBufferEnd; cur = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); } else if (cur == c) { break; } else { pos--; break; } } return pos + 1; } private int getDocClosingComment(int offset) { final int lBufferEnd = myBufferEndOffset; final CharSequence lBuffer = myBuffer; final char[] lBufferArray = myBufferArray; final boolean hasArray = lBufferArray != null; if (offset < lBufferEnd && (hasArray ? lBufferArray[offset] : lBuffer.charAt(offset)) == '/') { return offset + 1; } int pos = offset; while (pos < lBufferEnd - 1) { final char c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); if (c == '*' && (hasArray ? lBufferArray[pos + 1] : lBuffer.charAt(pos + 1)) == '/' ) { break; } pos++; } return pos + 2; } private int getClosingComment(int offset) { int pos = offset; final int lBufferEnd = myBufferEndOffset; final CharSequence lBuffer = myBuffer; final char[] lBufferArray = myBufferArray; final boolean hasArray = lBufferArray != null; while (pos < lBufferEnd - 1) { final char c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); if (c == '*' && (hasArray ? lBufferArray[pos + 1] : lBuffer.charAt(pos + 1)) == '/' ) { break; } pos++; } return pos + 2; } private int getLineTerminator(int offset) { int pos = offset; final int lBufferEnd = myBufferEndOffset; final CharSequence lBuffer = myBuffer; final char[] lBufferArray = myBufferArray; final boolean hasArray = lBufferArray != null; while (pos < lBufferEnd) { final char c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); if (c == '\r' || c == '\n') break; pos++; } return pos; } private int getIdentifier(int offset) { final CharSequence lBuffer = myBuffer; final char[] lBufferArray = myBufferArray; final boolean hasArray = lBufferArray != null; int hashCode = (hasArray ? lBufferArray[offset - 1] : lBuffer.charAt(offset - 1)) * 2; final int lBufferEnd = myBufferEndOffset; int pos = offset; if (pos < lBufferEnd) { char c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '$' || ((c > 127) && Character.isJavaIdentifierPart(c))) { pos++; hashCode += c; if (pos == lBufferEnd) break; c = hasArray ? lBufferArray[pos] : lBuffer.charAt(pos); } } if (myTable.contains(hashCode, lBuffer, lBufferArray, offset - 1)) { myTokenType = myTable.getTokenType(hashCode); } else { myTokenType = JavaTokenType.IDENTIFIER; } return pos; } public final char[] getBuffer() { return myBufferArray != null ? myBufferArray : CharArrayUtil.fromSequence(myBuffer); } @Override public CharSequence getBufferSequence() { return myBuffer; } @Override public final int getBufferEnd() { return myBufferEndOffset; } public static void main(String[] args) { try { BufferedReader reader = new BufferedReader(new FileReader(args[0])); String s; StringBuffer buf = new StringBuffer(); while ((s = reader.readLine()) != null) { buf.append(s).append("\n"); } JavaLexer lexer = new JavaLexer(true, true); lexer.start(buf, 0, buf.length()); while (lexer.getTokenType() != null) { lexer.advance(); } } catch (FileNotFoundException e) { e.printStackTrace(); //To change body of catch statement use Options | File Templates. } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use Options | File Templates. } } }