/******************************************************************************* * Copyright (c) 2000, 2011 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation * QNX Software System * Anton Leherbauer (Wind River Systems) * Andrew Ferguson (Symbian) *******************************************************************************/ package org.eclipse.cdt.internal.ui.text; import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.rules.ICharacterScanner; import org.eclipse.jface.text.rules.IPartitionTokenScanner; import org.eclipse.jface.text.rules.IToken; import org.eclipse.jface.text.rules.Token; import org.eclipse.cdt.ui.text.ICPartitions; import org.eclipse.cdt.ui.text.doctools.IDocCommentOwner; /** * This scanner recognizes the C multi line comments, C single line comments, * C strings, C characters and C preprocessor directives. */ public final class FastCPartitionScanner implements IPartitionTokenScanner, ICPartitions { // states private static final int CCODE= 0; private static final int SINGLE_LINE_COMMENT= 1; private static final int MULTI_LINE_COMMENT= 2; private static final int CHARACTER= 3; private static final int STRING= 4; private static final int RAW_STRING= 5; private static final int PREPROCESSOR= 6; private static final int PREPROCESSOR_MULTI_LINE_COMMENT= 7; private static final int PREPROCESSOR_STRING= 8; private static final int SINGLE_LINE_DOC_COMMENT= 9; private static final int MULTI_LINE_DOC_COMMENT= 10; /** * Sub state for raw strings. */ private enum RawStringState { OPEN_DELIMITER, CONTENT, CLOSE_DELIMITER } // beginning of prefixes and postfixes private static final int NONE= 0; private static final int BACKSLASH= 1; // postfix for STRING, CHARACTER and SINGLE_LINE_COMMENT private static final int SLASH= 2; // prefix for SINGLE_LINE or MULTI_LINE private static final int SLASH_STAR= 3; // prefix for MULTI_LINE_COMMENT private static final int STAR= 4; // postfix for MULTI_LINE_COMMENT private static final int CARRIAGE_RETURN=5; // postfix for STRING, CHARACTER and SINGLE_LINE_COMMENT private static final int BACKSLASH_CR= 6; // postfix for STRING, CHARACTER and SINGLE_LINE_COMMENT private static final int BACKSLASH_BACKSLASH= 7; // postfix for STRING, CHARACTER private static final int RAW_STRING_R= 8; // prefix for RAW_STRING private static final int IDENT= 9; /** The scanner. */ private final BufferedDocumentScanner fScanner= new BufferedDocumentScanner(1000); // faster implementation /** The offset of the last returned token. */ private int fTokenOffset; /** The length of the last returned token. */ private int fTokenLength; /** The state of the scanner. */ private int fState; /** The last significant characters read. */ private int fLast; /** The amount of characters already read on first call to nextToken(). */ private int fPrefixLength; /** Indicate whether current char is first non-whitespace char on the line*/ private boolean fFirstCharOnLine= true; /** An optional (possibly null) comment owner for detecting documentation-comments */ private final IDocCommentOwner fOwner; private IDocument fDocument; private final IToken[] fTokens= new IToken[] { new Token(null), new Token(C_SINGLE_LINE_COMMENT), new Token(C_MULTI_LINE_COMMENT), new Token(C_CHARACTER), new Token(C_STRING), new Token(C_STRING), new Token(C_PREPROCESSOR), new Token(C_MULTI_LINE_COMMENT), new Token(C_PREPROCESSOR), new Token(C_SINGLE_LINE_DOC_COMMENT), new Token(C_MULTI_LINE_DOC_COMMENT) }; private final StringBuilder fRawStringDelimiter = new StringBuilder(12); public FastCPartitionScanner(IDocCommentOwner owner) { fOwner = owner; } public FastCPartitionScanner() { this(null); } /* * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken() */ public IToken nextToken() { fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; RawStringState rawStringState = RawStringState.OPEN_DELIMITER; int rawStringDelimiterIdx = 0; while (true) { final int ch= fScanner.read(); final boolean isFirstCharOnLine= fFirstCharOnLine; if (isFirstCharOnLine && ch != ' ' && ch != '\t') { fFirstCharOnLine= false; } // characters switch (ch) { case ICharacterScanner.EOF: fLast= NONE; // ignore last if (fTokenLength > 0) { return preFix(fState, CCODE, NONE, 0); } else { fPrefixLength= 0; return Token.EOF; } case '\r': fFirstCharOnLine= true; if (fLast == BACKSLASH || fLast == BACKSLASH_BACKSLASH) { fLast= BACKSLASH_CR; fTokenLength++; continue; } else if (fLast != CARRIAGE_RETURN) { fLast= CARRIAGE_RETURN; fTokenLength++; continue; } else { // fLast == CARRIAGE_RETURN switch (fState) { case SINGLE_LINE_COMMENT: case CHARACTER: case STRING: case PREPROCESSOR: if (fTokenLength > 0) { IToken token= fTokens[fState]; fLast= CARRIAGE_RETURN; fPrefixLength= 1; fState= CCODE; return token; } else { consume(); continue; } default: consume(); continue; } } case '\\': switch (fState) { case CHARACTER: case STRING: case PREPROCESSOR_STRING: fTokenLength++; fLast= fLast == BACKSLASH ? BACKSLASH_BACKSLASH : BACKSLASH; continue; default: fTokenLength++; fLast= BACKSLASH; continue; } case '\n': fFirstCharOnLine= true; switch (fState) { case SINGLE_LINE_COMMENT: case CHARACTER: case STRING: case PREPROCESSOR: case PREPROCESSOR_STRING: // assert(fTokenLength > 0); // if last char was a backslash then we have spliced line if (fLast != BACKSLASH && fLast != BACKSLASH_CR && fLast != BACKSLASH_BACKSLASH) { return postFix(fState); } consume(); continue; default: consume(); continue; } default: if (fLast == CARRIAGE_RETURN) { switch (fState) { case SINGLE_LINE_COMMENT: case CHARACTER: case STRING: case PREPROCESSOR: case PREPROCESSOR_STRING: int last; int newState; switch (ch) { case '/': last= SLASH; newState= CCODE; break; case '*': last= STAR; newState= CCODE; break; case '\'': last= NONE; newState= CHARACTER; break; case '"': last= NONE; newState= STRING; break; case '\r': last= CARRIAGE_RETURN; newState= CCODE; break; case '\\': last= BACKSLASH; newState= CCODE; break; case '#': last= NONE; newState= PREPROCESSOR; break; default: last= NONE; newState= CCODE; break; } fLast= NONE; // ignore fLast return preFix(fState, newState, last, 1); case CCODE: if (ch == '#' && isFirstCharOnLine) { fLast= NONE; // ignore fLast int column= fScanner.getColumn() - 1; fTokenLength -= column; if (fTokenLength > 0) { return preFix(fState, PREPROCESSOR, NONE, column + 1); } else { preFix(fState, PREPROCESSOR, NONE, column + 1); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } } break; default: break; } } } // states switch (fState) { case CCODE: switch (ch) { case '/': if (fLast == SLASH) { if (fTokenLength - getLastLength(fLast) > 0) { return preFix(CCODE, SINGLE_LINE_COMMENT, NONE, 2); } else { preFix(CCODE, SINGLE_LINE_COMMENT, NONE, 2); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } } else { fTokenLength++; fLast= SLASH; break; } case '*': if (fLast == SLASH) { if (fTokenLength - getLastLength(fLast) > 0) { return preFix(CCODE, MULTI_LINE_COMMENT, SLASH_STAR, 2); } else { preFix(CCODE, MULTI_LINE_COMMENT, SLASH_STAR, 2); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } } else { consume(); break; } case '\'': fLast= NONE; // ignore fLast if (fTokenLength > 0) { return preFix(CCODE, CHARACTER, NONE, 1); } else { preFix(CCODE, CHARACTER, NONE, 1); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } case 'u': case 'U': case 'L': if (fLast != IDENT) { fLast = NONE; } fTokenLength++; continue; case 'R': if (fLast == RAW_STRING_R) { fLast = IDENT; } else if (fLast != IDENT) { fLast = RAW_STRING_R; } fTokenLength++; continue; case '"': int newState = STRING; if (fLast == RAW_STRING_R) { newState = RAW_STRING; rawStringState = RawStringState.OPEN_DELIMITER; fRawStringDelimiter.setLength(0); } fLast= NONE; // ignore fLast if (fTokenLength > 0 ) { return preFix(CCODE, newState, NONE, 1); } else { preFix(CCODE, newState, NONE, 1); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } case '#': if (isFirstCharOnLine) { int column= fScanner.getColumn() - 1; fTokenLength -= column; if (fTokenLength > 0) { return preFix(fState, PREPROCESSOR, NONE, column + 1); } else { preFix(fState, PREPROCESSOR, NONE, column + 1); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } } consume(); break; default: if ('a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch =='_') { fLast = IDENT; fTokenOffset++; } else if ('0' <= ch && ch <= '9' && fLast == IDENT) { fTokenOffset++; } else { consume(); } break; } break; case SINGLE_LINE_COMMENT: consume(); break; case PREPROCESSOR: switch (ch) { case '/': if (fLast == SLASH) { if (fTokenLength - getLastLength(fLast) > 0) { return preFix(fState, SINGLE_LINE_COMMENT, SLASH, 2); } else { preFix(fState, SINGLE_LINE_COMMENT, SLASH, 2); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } } else { fTokenLength++; fLast= SLASH; } break; case '*': if (fLast == SLASH) { if (fTokenLength - getLastLength(fLast) > 0) { return preFix(fState, PREPROCESSOR_MULTI_LINE_COMMENT, SLASH_STAR, 2); } else { preFix(fState, PREPROCESSOR_MULTI_LINE_COMMENT, SLASH_STAR, 2); fTokenOffset += fTokenLength; fTokenLength= fPrefixLength; break; } } consume(); break; case '"': if (fLast != BACKSLASH) { fState= PREPROCESSOR_STRING; } consume(); break; default: consume(); break; } break; case PREPROCESSOR_STRING: switch (ch) { case '"': if (fLast != BACKSLASH) { fState= PREPROCESSOR; } consume(); break; default: consume(); break; } break; case PREPROCESSOR_MULTI_LINE_COMMENT: switch (ch) { case '*': fTokenLength++; fLast= STAR; break; case '/': if (fLast == STAR) { IToken token= postFix(fState); fState= PREPROCESSOR; return token; } consume(); break; default: consume(); break; } break; case MULTI_LINE_COMMENT: switch (ch) { case '*': fTokenLength++; fLast= STAR; break; case '/': if (fLast == STAR) { return postFix(MULTI_LINE_COMMENT); } else { consume(); break; } default: consume(); break; } break; case STRING: switch (ch) { case '\"': if (fLast != BACKSLASH) { return postFix(STRING); } else { consume(); break; } default: consume(); break; } break; case RAW_STRING: switch (rawStringState) { case OPEN_DELIMITER: if (ch == '(') { rawStringState = RawStringState.CONTENT; } else if (ch == '"') { return postFix(RAW_STRING); } else if (ch != ' ' && ch != '\\' && ch != ')' && fRawStringDelimiter.length() < 12) { fRawStringDelimiter.append((char) ch); } else { fState = STRING; } consume(); break; case CONTENT: if (ch == ')') { rawStringState = RawStringState.CLOSE_DELIMITER; rawStringDelimiterIdx = 0; } consume(); break; case CLOSE_DELIMITER: if (ch == ')') { rawStringDelimiterIdx = 0; } else if (rawStringDelimiterIdx < fRawStringDelimiter.length()) { if (fRawStringDelimiter.charAt(rawStringDelimiterIdx) != ch) { rawStringState = RawStringState.CONTENT; } else { ++rawStringDelimiterIdx; } } else if (ch == '"') { return postFix(RAW_STRING); } else { rawStringState = RawStringState.CONTENT; } consume(); break; } break; case CHARACTER: switch (ch) { case '\'': if (fLast != BACKSLASH) { return postFix(CHARACTER); } else { consume(); break; } default: consume(); break; } break; } } } private static final int getLastLength(int last) { switch (last) { default: return -1; case NONE: case IDENT: return 0; case CARRIAGE_RETURN: case BACKSLASH: case SLASH: case STAR: case RAW_STRING_R: return 1; case SLASH_STAR: case BACKSLASH_CR: case BACKSLASH_BACKSLASH: return 2; } } private final void consume() { fTokenLength++; fLast= NONE; } private final IToken postFix(int state) { return postFix(state, CCODE); } private final IToken postFix(int state, int newState) { fTokenLength++; fLast= NONE; fState= newState; fPrefixLength= 0; return fTokens[interceptTokenState(state)]; } private final IToken preFix(int state, int newState, int last, int prefixLength) { fTokenLength -= getLastLength(fLast); fLast= last; fPrefixLength= prefixLength; fState= newState; return fTokens[interceptTokenState(state)]; } private static int getState(String contentType) { if (contentType == null) return CCODE; else if (contentType.equals(C_SINGLE_LINE_COMMENT)) return SINGLE_LINE_COMMENT; else if (contentType.equals(C_MULTI_LINE_COMMENT)) return MULTI_LINE_COMMENT; else if (contentType.equals(C_STRING)) return STRING; else if (contentType.equals(C_CHARACTER)) return CHARACTER; else if (contentType.equals(C_PREPROCESSOR)) return PREPROCESSOR; else if (contentType.equals(C_SINGLE_LINE_DOC_COMMENT)) return SINGLE_LINE_COMMENT; // intentionally non-doc state: the state machine is doc-comment unaware else if (contentType.equals(C_MULTI_LINE_DOC_COMMENT)) return MULTI_LINE_COMMENT; // intentionally non-doc state: the state machine is doc-comment unaware else return CCODE; } /* * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String, int) */ public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) { fDocument= document; fScanner.setRange(document, offset, length); fTokenOffset= partitionOffset; fTokenLength= 0; fPrefixLength= offset - partitionOffset; fLast= NONE; fState= getState(contentType); if (fState == STRING) { // raw string is a special case: need to restart from partition offset try { if (partitionOffset > 0 && fDocument.getChar(partitionOffset - 1) == 'R') { fState = RAW_STRING; int endOffset = offset + length; offset = partitionOffset + 1; length = endOffset - offset; fScanner.setRange(document, offset, length); fPrefixLength = offset - partitionOffset; fRawStringDelimiter.setLength(0); } } catch (BadLocationException exc) { // cannot happen } } if (offset == partitionOffset) { // restart at beginning of partition fState= CCODE; } try { int column= fScanner.getColumn(); fFirstCharOnLine= column == 0 || document.get(offset-column, column).trim().length() == 0; } catch (BadLocationException exc) { fFirstCharOnLine= true; } } /* * @see ITokenScanner#setRange(IDocument, int, int) */ public void setRange(IDocument document, int offset, int length) { fDocument= document; fScanner.setRange(document, offset, length); fTokenOffset= offset; fTokenLength= 0; fPrefixLength= 0; fLast= NONE; fState= CCODE; try { int column= fScanner.getColumn(); fFirstCharOnLine= column == 0 || document.get(offset-column, column).trim().length() == 0; } catch (BadLocationException exc) { fFirstCharOnLine= true; } } /* * @see ITokenScanner#getTokenLength() */ public int getTokenLength() { return fTokenLength; } /* * @see ITokenScanner#getTokenOffset() */ public int getTokenOffset() { return fTokenOffset; } private int interceptTokenState(int proposedTokenState) { if(fOwner!=null) { switch(proposedTokenState) { case MULTI_LINE_COMMENT: if(fOwner.getMultilineConfiguration().isDocumentationComment(fDocument, fTokenOffset, fTokenLength)) return MULTI_LINE_DOC_COMMENT; break; case SINGLE_LINE_COMMENT: if(fOwner.getSinglelineConfiguration().isDocumentationComment(fDocument, fTokenOffset, fTokenLength)) return SINGLE_LINE_DOC_COMMENT; break; } } return proposedTokenState; } /** * @return the DocCommentOwner associated with this partition scanner, or null * if there is no owner. * @since 5.0 */ public IDocCommentOwner getDocCommentOwner() { return fOwner; } }