/*******************************************************************************
* Copyright Technophobia Ltd 2012
*
* This file is part of the Substeps Eclipse Plugin.
*
* The Substeps Eclipse Plugin is free software: you can redistribute it and/or modify
* it under the terms of the Eclipse Public License v1.0.
*
* The Substeps Eclipse Plugin is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Eclipse Public License for more details.
*
* You should have received a copy of the Eclipse Public License
* along with the Substeps Eclipse Plugin. If not, see <http://www.eclipse.org/legal/epl-v10.html>.
******************************************************************************/
package com.technophobia.substeps.document.text.rule;
import java.util.Arrays;
import org.eclipse.jface.text.rules.ICharacterScanner;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.SingleLineRule;
/**
* Extension of SingleLineRule which handles a line which may have a trailing comment,
* e.g.:
* <code>
* Given that some condition applies # condition applies
* </code>
* Any comment part, i.e. the # and all following text, is ignored when treating the line
* with this rule. The comment remains in the scanner to be handled by its own rule.
* <p>
* A # within any quoted string in the non-comment part of the line is not taken
* to indicate a trailing comment, so e.g.:
* <code>
* Given that the text contains "something # comment"
* </code>
* is not treated as if it had a trailing comment. Single or double quotes may be used,
* but they must be paired correctly.
* <p>
* A single instance of a quote character (" or ') may appear on the line without being escaped,
* e.g.
* <code>
* Given that I'm using an apostrophe
* </code>
*
* @author ariley
*
*/
public class SingleLineWithTrailingCommentRule extends SingleLineRule {
public static final String TRAILING_COMMENT_START = "#";
private static final char DEFAULT_ESCAPE_CHAR = '\\';
private static final int[] QUOTE_CHARS = new int[] {'"', '\''};
private static final int[] EOL_CHARS = new int[] {'\n', '\r'};
static {
Arrays.sort(QUOTE_CHARS);
Arrays.sort(EOL_CHARS);
}
private char escapeCharacter;
private boolean escapeContinuesLine;
/**
* Creates a rule for the given starting sequence
* which, if detected, will return the specified token.
* Any character which follows a backslash character '\'
* will be ignored, except for the terminating end of
* line character. There is no need for a terminating end
* of line character on the last line in a file.
*
* @param startSequence the pattern's start sequence
* @param token the token to be returned on success
*/
public SingleLineWithTrailingCommentRule(String startSequence,
IToken token) {
this(startSequence, token, DEFAULT_ESCAPE_CHAR);
}
/**
* Creates a rule for the given starting sequence
* which, if detected, will return the specified token.
* Any character which follows the given escape character
* will be ignored. There is no need for a terminating end
* of line character on the last line in a file.
*
* @param startSequence the pattern's start sequence
* @param token the token to be returned on success
* @param escapeCharacter the escape character, does not
* continue a line if it appears immediately before the
* end of line character.
*/
public SingleLineWithTrailingCommentRule(String startSequence,
IToken token, char escapeCharacter) {
this(startSequence, token, escapeCharacter, false);
}
/**
* Creates a rule for the given starting sequence
* which, if detected, will return the specified token.
* Any character which follows the given escape character
* will be ignored. There is no need for a terminating end
* of line character on the last line in a file.
*
* @param startSequence the pattern's start sequence
* @param token the token to be returned on success
* @param escapeCharacter the escape character
* @param escapeContinuesLine indicates whether the specified escape character is used for line
* continuation, so that an end of line immediately after the escape character does not
* terminate the line, even if <code>breakOnEOL</code> is true
*/
public SingleLineWithTrailingCommentRule(String startSequence,
IToken token, char escapeCharacter, boolean escapeContinuesLine) {
super(startSequence, TRAILING_COMMENT_START, token, escapeCharacter, true, escapeContinuesLine); // break on EOF
this.escapeCharacter = escapeCharacter;
this.escapeContinuesLine = escapeContinuesLine;
}
@Override
protected boolean endSequenceDetected(ICharacterScanner scanner) {
final QuoteAwareScanner wrapper = new QuoteAwareScanner(scanner);
boolean endSequenceDetected = super.endSequenceDetected(wrapper);
if (endSequenceDetected) {
// Was it trailing comment or EOL/EOF?
scanner.unread();
int c = scanner.read();
if (c == TRAILING_COMMENT_START.charAt(TRAILING_COMMENT_START.length() - 1)) {
// Put the comment opener back
for (int i = 0; i < TRAILING_COMMENT_START.length(); i++) {
scanner.unread();
}
}
}
return endSequenceDetected;
}
/**
* Wrapper round a ICharacterScanner which detects and ignores quoted strings within the text.
*
*/
private class QuoteAwareScanner implements ICharacterScanner {
private final ICharacterScanner delegate;
private QuoteAwareScanner(ICharacterScanner scanner) {
this.delegate = scanner;
}
@Override
public char[][] getLegalLineDelimiters() {
return delegate.getLegalLineDelimiters();
}
@Override
public int getColumn() {
return delegate.getColumn();
}
/**
* Get the next character to process.
* <p>
* If the next character read from the underlying ICharacterScanner is not a quote character
* (i.e. " or '), then return it, otherwise continue to read until a matching quote is found,
* then return the following character. Quotes must be paired exactly, i.e. a quoted string
* which starts with " must be terminated with another ", " will not be matched with '.
* </p><p>
* Searching stops if EOF is reached, or an unescaped end of line character is read,
* or escapeContinuesLine is false and an end of line character is read. In that case,
* searching for the closing quote is aborted and all the characters which have
* been read are unread. Hence a single quote character without a matching quote
* character is treated as a normal character.
* </p>
*/
@Override
public int read() {
// Skip over quoted string
int c = delegate.read();
if (c == escapeCharacter) {
// Skip the escaped character and return the next character from the scanner
delegate.read();
} else if (Arrays.binarySearch(QUOTE_CHARS, c) >= 0) {
// Keep the char to match with the closing quote
int openQuote = c;
// count chars read while searching for complete quoted string, including the opening quote
int charsRead = 1;
do {
c = delegate.read();
charsRead++;
if (c == escapeCharacter) {
// read next char, but don't test for quote, or EOL when escaped EOL continues line
// so basically ignore it unless it's EOF
c = delegate.read();
charsRead++;
} else if (c == openQuote) {
// matched opening quote, now continue with the remaining string
break;
} else if (escapeContinuesLine && Arrays.binarySearch(EOL_CHARS, c) >= 0) {
// Escaped EOL continues line
continue;
}
if (c == ICharacterScanner.EOF || Arrays.binarySearch(EOL_CHARS, c) >= 0) {
// EOF or (un-escaped) EOL, quoted string not closed so push all back
unread(charsRead);
break;
}
} while (true);
} else {
// Not a quote, push it back
delegate.unread();
}
return delegate.read();
}
@Override
public void unread() {
delegate.unread();
}
private void unread(int charsRead) {
for (int i = 0; i < charsRead; i++) {
delegate.unread();
}
}
}
}