AsmPartitionScanner.java example

Explorer
cdt-master
/*******************************************************************************
 * Copyright (c) 2000, 2012 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     QNX Software System
 *     Anton Leherbauer (Wind River Systems)
 *******************************************************************************/
package org.eclipse.cdt.internal.ui.text.asm;


import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.rules.ICharacterScanner;
import org.eclipse.jface.text.rules.IPartitionTokenScanner;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.Token;

import org.eclipse.cdt.ui.text.ICPartitions;

import org.eclipse.cdt.internal.ui.text.BufferedDocumentScanner;


/**
 * This scanner recognizes
 * <ul>
 * <li>C multi line comments</li>
 * <li>Assembly line comments</li>
 * <li>C++ line comments</li>
 * <li>C string literals</li>
 * <li>Assembly character literals</li>
 * <li>C preprocessor directives</li>
 * </ul>
 * Default supported line start comment characters are ';', '!', '|' and '@'.
 * <p>
 * A simple heuristic is used to detect valid line end comment characters: If a
 * character known to be a potential line end comment character is used as a
 * line start comment char, this character is considered a valid line end
 * comment character, too. If e.g. a '@' is used to introduce a line start
 * comment it is also recognized as a line end comment introducer afterwards.
 * </p>
 */
public final class AsmPartitionScanner implements IPartitionTokenScanner, ICPartitions {

	/**
	 * Characters not applicable for line start comments.
	 */
	private static final String NON_LINE_COMMENT_CHARS = "\t #/_$.%()'\"{}"; //$NON-NLS-1$
	/**
	 * Characters known to be used as line end comments.
	 */
	private static final String KNOWN_LINE_END_COMMENT_CHARS = "#;@|!"; //$NON-NLS-1$
	
	// states
	private static final int CCODE= 0;
	private static final int SINGLE_LINE_COMMENT= 1;
	private static final int MULTI_LINE_COMMENT= 2;
	private static final int CHARACTER= 3;
	private static final int STRING= 4;
	private static final int PREPROCESSOR= 5;
	private static final int PREPROCESSOR_MULTI_LINE_COMMENT= 6;

	// beginning of prefixes and postfixes
	private static final int NONE= 0;
	private static final int BACKSLASH= 1; // postfix for STRING, CHARACTER and SINGLE_LINE_COMMENT
	private static final int SLASH= 2; // prefix for SINGLE_LINE or MULTI_LINE
	private static final int SLASH_STAR= 3; // prefix for MULTI_LINE_COMMENT
	private static final int STAR= 4; // postfix for MULTI_LINE_COMMENT
	private static final int CARRIAGE_RETURN=5; // postfix for STRING, CHARACTER and SINGLE_LINE_COMMENT
	private static final int BACKSLASH_CR= 6; // postfix for STRING, CHARACTER and SINGLE_LINE_COMMENT
	private static final int BACKSLASH_BACKSLASH= 7; // postfix for STRING, CHARACTER
	
	/** The scanner. */
	private final BufferedDocumentScanner fScanner= new BufferedDocumentScanner(1000);
	
	/** The offset of the last returned token. */
	private int fTokenOffset;
	/** The length of the last returned token. */
	private int fTokenLength;
	
	/** The state of the scanner. */
	private int fState;
	/** The last significant character read. */
	private int fLast;
	/** The amount of characters already read on first call to nextToken(). */
	private int fPrefixLength;
	/** Indicate whether current char is first non-whitespace char on the line*/
	private boolean fFirstCharOnLine= true;

	private final IToken[] fTokens= new IToken[] {
		new Token(null),
		new Token(C_SINGLE_LINE_COMMENT),
		new Token(C_MULTI_LINE_COMMENT),
		new Token(C_CHARACTER),
		new Token(C_STRING),
		new Token(C_PREPROCESSOR),
		new Token(C_MULTI_LINE_COMMENT)
	};

	private String fLineCommentChars= ""; //$NON-NLS-1$
	private String fLineEndCommentChars= ""; //$NON-NLS-1$
	private String fLineStartCommentChars= ";!|@"; //$NON-NLS-1$
	private boolean fDetectPreprocessor= true;
	
	public AsmPartitionScanner() {
	}

	/**
	 * Configure the set of default line comment characters.
	 * <p>
	 * These characters are considered line comment introducers at any position
	 * in the code.
	 * </p>
	 * 
	 * @param chars
	 */
	public void setLineCommentCharacters(String chars) {
		fLineCommentChars= chars != null ? chars : ""; //$NON-NLS-1$
	}

	/**
	 * Configure the set of default line start comment characters.
	 * <p>
	 * These characters are considered line comment introducers only as first
	 * non-whitespace character on the line.
	 * </p>
	 * 
	 * @param chars
	 */
	public void setLineStartCommentCharacters(String chars) {
		fLineStartCommentChars= chars != null ? chars : ""; //$NON-NLS-1$
	}

	/**
	 * Configure the set of default line end comment characters.
	 * <p>
	 * These characters are considered line comment introducers at the end of a
	 * instruction line.
	 * </p>
	 * 
	 * @param chars
	 */
	public void setLineEndCommentCharacters(String chars) {
		fLineEndCommentChars= chars != null ? chars : ""; //$NON-NLS-1$
	}

	/**
	 * Configure whether preprocessor directives should be detected.
	 * 
	 * @param detectPreprocessor
	 */
	public void setDetectPreprocessorDiretives(boolean detectPreprocessor) {
		fDetectPreprocessor= detectPreprocessor;
	}
	
	/*
	 * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
	 */
	@Override
	public IToken nextToken() {

		fTokenOffset += fTokenLength;
		fTokenLength= fPrefixLength;

		while (true) {
			final int ch= fScanner.read();
			
			final boolean isFirstCharOnLine= fFirstCharOnLine;
			if (fFirstCharOnLine && ch != ' ' && ch != '\t') {
				fFirstCharOnLine= false;
			}
			// characters
	 		switch (ch) {
	 		case ICharacterScanner.EOF:
		 		if (fTokenLength > 0) {
		 			fLast= NONE; // ignore last
		 			return preFix(fState, CCODE, NONE, 0);

		 		} else {
		 			fLast= NONE;
		 			fPrefixLength= 0;
					return Token.EOF;
		 		}

	 		case '\r':
	 			fFirstCharOnLine= true;
	 			if (fLast == BACKSLASH || fLast == BACKSLASH_BACKSLASH) {
	 				fLast= BACKSLASH_CR;
					fTokenLength++;
 					continue;
	 			} else if (fLast != CARRIAGE_RETURN) {
						fLast= CARRIAGE_RETURN;
						fTokenLength++;
	 					continue;
	 			} else {
	 				// fLast == CARRIAGE_RETURN
					switch (fState) {
					case SINGLE_LINE_COMMENT:
					case CHARACTER:
					case STRING:
					case PREPROCESSOR:
						if (fTokenLength > 0) {
							IToken token= fTokens[fState];

							fLast= CARRIAGE_RETURN;
							fPrefixLength= 1;

							fState= CCODE;
							return token;

						} else {
							consume();
							continue;
						}

					default:
						consume();
						continue;
					}
	 			}

	 		case '\\':
				switch (fState) {
				case CHARACTER:
				case STRING:
					fTokenLength++;
					fLast= fLast == BACKSLASH ? BACKSLASH_BACKSLASH : BACKSLASH;
					continue;
				default:
					fTokenLength++;
					fLast= BACKSLASH;
					continue;
	 			}

	 		case '\n':
	 			fFirstCharOnLine= true;
				switch (fState) {
				case SINGLE_LINE_COMMENT:
				case CHARACTER:
				case STRING:
				case PREPROCESSOR:
					// assert(fTokenLength > 0);
					// if last char was a backslash then we have an escaped line
					if (fLast != BACKSLASH && fLast != BACKSLASH_CR && fLast != BACKSLASH_BACKSLASH) {
						return postFix(fState);
					}
					consume();
					continue;
				default:
					consume();
					continue;
				}

	 		case ' ':
	 		case '\t':
	 			consume();
	 			continue;

			default:
				if (fLast == CARRIAGE_RETURN) {
					switch (fState) {
					case SINGLE_LINE_COMMENT:
					case CHARACTER:
					case STRING:
					case PREPROCESSOR:

						int last;
						int newState;
						switch (ch) {
						case '/':
							last= SLASH;
							newState= CCODE;
							break;

						case '*':
							last= STAR;
							newState= CCODE;
							break;

						case '\'':
							last= NONE;
							newState= CHARACTER;
							break;

						case '"':
							last= NONE;
							newState= STRING;
							break;

						case '\r':
							last= CARRIAGE_RETURN;
							newState= CCODE;
							break;

						case '\\':
							last= BACKSLASH;
							newState= CCODE;
							break;

						case '#':
							last= NONE;
							newState= PREPROCESSOR;
							break;

						default:
							last= NONE;
							newState= CCODE;
							break;
						}

						fLast= NONE; // ignore fLast
						return preFix(fState, newState, last, 1);

					case CCODE:
						if (fDetectPreprocessor && ch == '#' && isFirstCharOnLine) {
							fLast= NONE; // ignore fLast
							int column= fScanner.getColumn() - 1;
							fTokenLength -= column;
							if (fTokenLength > 0) {
								return preFix(fState, PREPROCESSOR, NONE, column + 1);
							} else {
								preFix(fState, PREPROCESSOR, NONE, column + 1);
								fTokenOffset += fTokenLength;
								fTokenLength= fPrefixLength;
								break;
							}
						}
						break;

					default:
						break;
					}
				}
			}

			// states
	 		switch (fState) {
	 		case CCODE:
 				if (fLast == NONE) {
 					boolean isLineComment= false;
 					if (ch == '#' && fDetectPreprocessor && isFirstCharOnLine) {
 						// don't consume as line comment
 					} else if (fLineCommentChars.indexOf(ch) >= 0 || (isFirstCharOnLine && fLineStartCommentChars.indexOf(ch) >= 0) || (!isFirstCharOnLine && fLineEndCommentChars.indexOf(ch) >= 0)) {
 						isLineComment= true;
 					} else if (isFirstCharOnLine && !Character.isLetterOrDigit((char)ch) && NON_LINE_COMMENT_CHARS.indexOf(ch) < 0) {
 						fLineStartCommentChars += (char)ch;
 						isLineComment= true;
	 				}
 					if (isLineComment) {
 						if (isFirstCharOnLine && fLineEndCommentChars.indexOf(ch) < 0 && KNOWN_LINE_END_COMMENT_CHARS.indexOf(ch) >= 0) {
 							fLineEndCommentChars += (char)ch;
 						}
						if (fTokenLength - getLastLength(fLast) > 0) {
 							return preFix(CCODE, SINGLE_LINE_COMMENT, NONE, 1);
 						} else {
 							preFix(CCODE, SINGLE_LINE_COMMENT, NONE, 1);
 							fTokenOffset += fTokenLength;
 							fTokenLength= fPrefixLength;
 						}
 						break;
 					}
	 			}

				switch (ch) {
				case '/':
					// supported by gnu preprocessor
					if (fLast == SLASH) {
						if (fTokenLength - getLastLength(fLast) > 0) {
							return preFix(CCODE, SINGLE_LINE_COMMENT, NONE, 2);
						} else {
							preFix(CCODE, SINGLE_LINE_COMMENT, NONE, 2);
							fTokenOffset += fTokenLength;
							fTokenLength= fPrefixLength;
							break;
						}
					} else {
						fTokenLength++;
						fLast= SLASH;
						break;
					}
	
				case '*':
					if (fLast == SLASH) {
						if (fTokenLength - getLastLength(fLast) > 0) {
							return preFix(CCODE, MULTI_LINE_COMMENT, SLASH_STAR, 2);
						} else {
							preFix(CCODE, MULTI_LINE_COMMENT, SLASH_STAR, 2);
							fTokenOffset += fTokenLength;
							fTokenLength= fPrefixLength;
							break;
						}
					} else {
						consume();
						break;
					}
					
				case '\'':
					fLast= NONE; // ignore fLast
					if (fTokenLength > 0) {
						return preFix(CCODE, CHARACTER, NONE, 1);
					} else {
						preFix(CCODE, CHARACTER, NONE, 1);
						fTokenOffset += fTokenLength;
						fTokenLength= fPrefixLength;
						break;
					}

				case '"':
					fLast= NONE; // ignore fLast
					if (fTokenLength > 0 ) {
						return preFix(CCODE, STRING, NONE, 1);
					} else {
						preFix(CCODE, STRING, NONE, 1);
						fTokenOffset += fTokenLength;
						fTokenLength= fPrefixLength;
						break;
					}
					
				case '#':
					if (fDetectPreprocessor && isFirstCharOnLine) {
						int column= fScanner.getColumn() - 1;
						fTokenLength -= column;
						if (fTokenLength > 0) {
							return preFix(fState, PREPROCESSOR, NONE, column + 1);
						} else {
							preFix(fState, PREPROCESSOR, NONE, column + 1);
							fTokenOffset += fTokenLength;
							fTokenLength= fPrefixLength;
							break;
						}
					}
					consume();
					break;
				default:
					consume();
					break;
				}
				break;

	 		case SINGLE_LINE_COMMENT:
				consume();
 				break;

	 		case PREPROCESSOR:
	 			switch (ch) {
				case '/':
					if (fLast == SLASH) {
						consume();
						break;
					} else {
						fTokenLength++;
						fLast= SLASH;
						break;
					}
	
				case '*':
					if (fLast == SLASH) {
						if (fTokenLength - getLastLength(fLast) > 0) {
							return preFix(fState, PREPROCESSOR_MULTI_LINE_COMMENT, SLASH_STAR, 2);
						} else {
							preFix(fState, PREPROCESSOR_MULTI_LINE_COMMENT, SLASH_STAR, 2);
							fTokenOffset += fTokenLength;
							fTokenLength= fPrefixLength;
							break;
						}
					} else {
						consume();
						break;
					}
		 		default:
					consume();
	 				break;
	 			}
	 			break;

	 		case PREPROCESSOR_MULTI_LINE_COMMENT:
				switch (ch) {
				case '*':
					fTokenLength++;
					fLast= STAR;
					break;
	
				case '/':
					if (fLast == STAR) {
						IToken token= postFix(fState);
						fState= PREPROCESSOR;
						return token;
					}
					consume();
					break;
	
				default:
					consume();
					break;
				}
				break;
				
	 		case MULTI_LINE_COMMENT:
				switch (ch) {
				case '*':
					fTokenLength++;
					fLast= STAR;
					break;
	
				case '/':
					if (fLast == STAR) {
						return postFix(MULTI_LINE_COMMENT);
					} else {
						consume();
						break;
					}
	
				default:
					consume();
					break;
				}
				break;
				
	 		case STRING:
	 			switch (ch) {
				case '\"':
	 				if (fLast != BACKSLASH) {
	 					return postFix(STRING);

		 			} else {
		 				consume();
		 				break;
		 			}
		 		
		 		default:
					consume();
	 				break;
	 			}
	 			break;
	
	 		case CHARACTER:
	 			switch (ch) {
				case '\\':
					fLast= (fLast == BACKSLASH) ? NONE : BACKSLASH;
					fTokenLength++;
	 				if (fLast != BACKSLASH) {
	 					return preFix(CHARACTER, CCODE, NONE, 1);
	 				}
					break;
	
	 			default:
	 				return postFix(CHARACTER);
	 			}
	 			break;
	 		}
		}
 	}

	private static final int getLastLength(int last) {
		switch (last) {
		default:
			return -1;

		case NONE:
			return 0;
			
		case CARRIAGE_RETURN:
		case BACKSLASH:
		case SLASH:
		case STAR:
			return 1;

		case SLASH_STAR:
		case BACKSLASH_CR:
		case BACKSLASH_BACKSLASH:
			return 2;

		}
	}

	private final void consume() {
		fTokenLength++;
		fLast= NONE;
	}
	
	private final IToken postFix(int state) {
		fTokenLength++;
		fLast= NONE;
		fState= CCODE;
		fPrefixLength= 0;
		return fTokens[state];
	}

	private final IToken preFix(int state, int newState, int last, int prefixLength) {
		fTokenLength -= getLastLength(fLast);
		fLast= last;
		fPrefixLength= prefixLength;
		IToken token= fTokens[state];
		fState= newState;
		return token;
	}

	private static int getState(String contentType) {

		if (contentType == null)
			return CCODE;

		else if (contentType.equals(C_SINGLE_LINE_COMMENT))
			return SINGLE_LINE_COMMENT;

		else if (contentType.equals(C_MULTI_LINE_COMMENT))
			return MULTI_LINE_COMMENT;

		else if (contentType.equals(C_STRING))
			return STRING;

		else if (contentType.equals(C_CHARACTER))
			return CHARACTER;

		else if (contentType.equals(C_PREPROCESSOR))
			return PREPROCESSOR;
			
		else
			return CCODE;
	}

	/*
	 * @see IPartitionTokenScanner#setPartialRange(IDocument, int, int, String, int)
	 */
	@Override
	public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) {

		fScanner.setRange(document, offset, length);
		fTokenOffset= partitionOffset;
		fTokenLength= 0;
		fPrefixLength= offset - partitionOffset;
		fLast= NONE;
		
		if (offset == partitionOffset) {
			// restart at beginning of partition
			fState= CCODE;
		} else {
			fState= getState(contentType);
		}

		try {
			int column= fScanner.getColumn();
			fFirstCharOnLine= column == 0 || document.get(offset-column, column).trim().length() == 0;
		} catch (BadLocationException exc) {
			fFirstCharOnLine= true;
		}
	}

	/*
	 * @see ITokenScanner#setRange(IDocument, int, int)
	 */
	@Override
	public void setRange(IDocument document, int offset, int length) {

		fScanner.setRange(document, offset, length);
		fTokenOffset= offset;
		fTokenLength= 0;
		fPrefixLength= 0;
		fLast= NONE;
		fState= CCODE;

		try {
			int column= fScanner.getColumn();
			fFirstCharOnLine= column == 0 || document.get(offset-column, column).trim().length() == 0;
		} catch (BadLocationException exc) {
			fFirstCharOnLine= true;
		}
	}

	/*
	 * @see ITokenScanner#getTokenLength()
	 */
	@Override
	public int getTokenLength() {
		return fTokenLength;
	}

	/*
	 * @see ITokenScanner#getTokenOffset()
	 */
	@Override
	public int getTokenOffset() {
		return fTokenOffset;
	}

}