/*=============================================================================#
# Copyright (c) 2005-2016 Stephan Wahlbrink (WalWare.de) and others.
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Eclipse Public License v1.0
# which accompanies this distribution, and is available at
# http://www.eclipse.org/legal/epl-v10.html
#
# Contributors:
# Stephan Wahlbrink - initial API and implementation
#=============================================================================*/
package de.walware.ecommons.text;
import org.apache.commons.collections.primitives.ArrayIntList;
import org.apache.commons.collections.primitives.IntList;
import org.eclipse.core.runtime.Assert;
import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IRegion;
import org.eclipse.jface.text.ITypedRegion;
import org.eclipse.jface.text.Region;
import org.eclipse.jface.text.TextUtilities;
import org.eclipse.jface.text.TypedRegion;
import de.walware.ecommons.text.core.IPartitionConstraint;
import de.walware.ecommons.text.core.sections.IDocContentSections;
/**
* Utility methods for heuristic based R manipulations in an incomplete source file.
* <p>
* An instance holds some internal position in the document and is therefore not threadsafe.</p>
*
* @since 0.2
*/
public class BasicHeuristicTokenScanner implements ITokenScanner {
protected static final IPartitionConstraint ALL_PARTITIONS_CONSTRAINT = new IPartitionConstraint() {
@Override
public boolean matches(final String partitionType) {
return true;
}
};
/**
* Specifies the stop condition, upon which the <code>scan...</code> methods will decide whether
* to keep scanning or not. This interface may implemented by clients.
*/
protected abstract class StopCondition {
/**
* Instructs the scanner to return the current position.
*
* @return <code>true</code> if the stop condition is met.
*/
public abstract boolean stop();
/**
* Asks the condition to return the next position to query. The default
* is to return the next/previous position.
*
* @return the next position to scan
*/
public int nextPositionForward() {
return fPos + 1;
}
public int nextPositionBackward() {
return fPos - 1;
}
}
/**
* Stops upon a character in the default partition that matches the given character list.
*/
protected abstract class PartitionBasedCondition extends StopCondition {
private ITypedRegion fCurrentPartition;
private boolean fCurrentPartitionMatched;
private int fCurrentPartitionStart;
private int fCurrentPartitionEnd;
public PartitionBasedCondition() {
fCurrentPartitionMatched = false;
}
@Override
public boolean stop() {
if (fCurrentPartitionMatched && fCurrentPartitionStart <= fPos && fPos < fCurrentPartitionEnd) {
return matchesChar();
}
fCurrentPartition = getPartition();
fCurrentPartitionStart = fCurrentPartition.getOffset();
fCurrentPartitionEnd = fCurrentPartitionStart+fCurrentPartition.getLength();
if (fPartitionConstraint.matches(fCurrentPartition.getType())) {
fCurrentPartitionMatched = true;
return matchesChar();
}
else {
fCurrentPartitionMatched = false;
return false;
}
}
protected abstract boolean matchesChar();
@Override
public int nextPositionForward() {
if (fCurrentPartitionMatched) {
return fPos + 1;
}
if (fPos < fCurrentPartitionEnd) {
return fCurrentPartitionEnd;
}
return fPos + 1;
}
@Override
public int nextPositionBackward() {
if (fCurrentPartitionMatched) {
return fPos - 1;
}
if (fPos >= fCurrentPartitionStart) {
return fCurrentPartitionStart - 1;
}
return fPos - 1;
}
}
protected class SingleCharacterMatchCondition extends PartitionBasedCondition {
protected final int fSingleChar;
/**
* Creates a new instance.
* @param ch the single character to match
*/
public SingleCharacterMatchCondition(final char ch) {
fSingleChar = ch;
}
@Override
protected boolean matchesChar() {
return (fSingleChar == fChar);
}
}
protected class CharacterMatchCondition extends PartitionBasedCondition {
protected final char[] fChars;
/**
* Creates a new instance.
* @param chars the chars to match.
*/
public CharacterMatchCondition(final char[] chars) {
assert (chars != null);
fChars = chars;
}
@Override
protected boolean matchesChar() {
for (int i = 0; i < fChars.length; i++) {
if (fChars[i] == fChar) {
return true;
}
}
return false;
}
}
protected class ExtCharacterMatchCondition extends CharacterMatchCondition {
private final char fEscapeChar;
private int fLastEscapeOffset = -100;
ExtCharacterMatchCondition(final char[] chars, final char escapeChar) {
super(chars);
fEscapeChar = escapeChar;
}
@Override
protected boolean matchesChar() {
if (fPos == fLastEscapeOffset+1) {
return false;
}
if (fChar == fEscapeChar) {
fLastEscapeOffset = fPos;
return false;
}
return super.matchesChar();
}
}
protected class StringMatchCondition extends PartitionBasedCondition {
protected final String fString;
private final char fEscapeChar;
private int fLastEscapeOffset = -100;
/**
* Creates a new instance.
* @param ch the string to match
*/
public StringMatchCondition(final String s, final char escapeChar) {
fString = s;
fEscapeChar = escapeChar;
}
@Override
protected boolean matchesChar() {
if (fPos == fLastEscapeOffset+1) {
return false;
}
if (fString.charAt(0) == fChar) {
try {
if (fString.regionMatches(1, fDocument.get(fPos+1, fString.length()-1),
0, fString.length()-1 )) {
return true;
}
} catch (final BadLocationException e) {}
}
if (fEscapeChar == fChar) {
fLastEscapeOffset = fPos;
return false;
}
return false;
}
}
/** The partitioning being used for scanning. */
private final String partitioning;
private final IPartitionConstraint defaultPartitionConstraint;
/** The document being scanned. */
protected IDocument fDocument;
/** The partition to scan in. */
private IPartitionConstraint fPartitionConstraint;
/* internal scan state */
/** the most recently read character. */
protected char fChar;
/** the most recently read position. */
protected int fPos;
/** the most recently read line of position (only if used). */
private int fLine;
private StopCondition fNonWSCondition;
private StopCondition fNonWSorLRCondition;
public BasicHeuristicTokenScanner(final IDocContentSections documentContentInfo,
final IPartitionConstraint defaultContentConstraint) {
this.partitioning= documentContentInfo.getPartitioning();
this.defaultPartitionConstraint= defaultContentConstraint;
}
public final String getDocumentPartitioning() {
return this.partitioning;
}
protected final IPartitionConstraint getDefaultPartitionConstraint() {
return this.defaultPartitionConstraint;
}
protected final IPartitionConstraint getPartitionConstraint() {
return fPartitionConstraint;
}
public final char getChar() {
return fChar;
}
protected boolean isWhitespace() {
return (Character.getType(fChar) == Character.SPACE_SEPARATOR || fChar == '\t');
}
protected final StopCondition getAnyNonWSCondition() {
if (fNonWSCondition == null) {
fNonWSCondition = new StopCondition() {
@Override
public boolean stop() {
return (!isWhitespace());
}
};
}
return fNonWSCondition;
}
protected final StopCondition getAnyNonWSorLRCondition() {
if (fNonWSorLRCondition == null) {
fNonWSorLRCondition = new StopCondition() {
@Override
public boolean stop() {
return (!isWhitespace() && fChar != '\r' && fChar != '\n');
}
};
}
return fNonWSorLRCondition;
}
protected final StopCondition getNonWSCondition() {
if (fNonWSCondition == null) {
fNonWSCondition = new PartitionBasedCondition() {
@Override
protected boolean matchesChar() {
return (!isWhitespace());
}
};
}
return fNonWSCondition;
}
protected final StopCondition getNonWSorLRCondition() {
if (fNonWSorLRCondition == null) {
fNonWSorLRCondition = new PartitionBasedCondition() {
@Override
protected boolean matchesChar() {
return (!isWhitespace() && fChar != '\r' && fChar != '\n');
}
};
}
return fNonWSorLRCondition;
}
/**
* Configures the scanner for the given document
* and the given partition type as partition constraint
*
* @param document the document to scan
* @param partition the partition to scan in
*/
@Override
public void configure(final IDocument document, final String partitionType) {
assert (document != null && partitionType != null);
fDocument = document;
fPartitionConstraint = new IPartitionConstraint() {
@Override
public boolean matches(final String partitionTypeToTest) {
return partitionType == partitionTypeToTest;
}
};
}
/**
* Configures the scanner for the given document
* and no partition constraint
*
* @param document the document to scan
*/
public void configure(final IDocument document) {
assert (document != null);
fDocument = document;
fPartitionConstraint = ALL_PARTITIONS_CONSTRAINT;
}
/**
* Configures the scanner for the given document
* and the partition constraint for default partitions
*
* @param document the document to scan
*/
public void configureDefaultParitions(final IDocument document) {
assert (document != null);
fDocument = document;
fPartitionConstraint = getDefaultPartitionConstraint();
}
/**
* Configures the scanner for the given document
* and the given partition constraint
*
* @param document the document to scan
* @param partition the partition to scan in
*/
public void configure(final IDocument document, final IPartitionConstraint partitionConstraint) {
assert (document != null && partitionConstraint != null);
fDocument = document;
fPartitionConstraint = partitionConstraint;
}
// public void configure(IDocument document, int offset) throws BadLocationException {
// configure(document, TextUtilities.getContentType(
// document, fPartitioning, offset, false));
// }
/**
* Returns the most recent internal scan position.
*
* @return the most recent internal scan position.
*/
public int getPosition() {
return fPos;
}
protected StopCondition createFindPeerStopCondition(final int start, final char[] pair, final char escapeChar) {
return (escapeChar == (char) 0) ?
new CharacterMatchCondition(pair) : new ExtCharacterMatchCondition(pair, escapeChar);
}
protected int createForwardBound(final int start) throws BadLocationException {
return fDocument.getLength();
}
protected int createBackwardBound(final int start) throws BadLocationException {
return -1;
}
@Override
public int findClosingPeer(final int start, final char[] pair) {
return findClosingPeer(start, pair, (char) 0);
}
@Override
public int findClosingPeer(int start, final char[] pair, final char escapeChar) {
Assert.isNotNull(fDocument);
Assert.isTrue(start >= 0);
try {
final StopCondition cond = createFindPeerStopCondition(start, pair, escapeChar);
final int bound = createForwardBound(start);
int depth = 1;
start -= 1;
while (true) {
start = scanForward(start + 1, bound, cond);
if (start == NOT_FOUND) {
return NOT_FOUND;
}
if (fChar == pair[OPENING_PEER]) {
depth++;
} else {
depth--;
}
if (depth == 0) {
return start;
}
}
}
catch (final BadLocationException e) {
return NOT_FOUND;
}
}
@Override
public int findOpeningPeer(int start, final char[] pair) {
if (start >= fDocument.getLength()) {
start = fDocument.getLength()-1;
}
try {
final StopCondition cond = createFindPeerStopCondition(start, pair, (char) 0);
final int bound = createBackwardBound(start);
int depth= 1;
start += 1;
while (true) {
start = scanBackward(start - 1, bound, cond);
if (start == NOT_FOUND) {
return NOT_FOUND;
}
if (fChar == pair[CLOSING_PEER]) {
depth++;
} else {
depth--;
}
if (depth == 0) {
return start;
}
}
}
catch (final BadLocationException e) {
return NOT_FOUND;
}
}
@Override
public int findOpeningPeer(int start, final char[] pair, final char escapeChar) {
Assert.isTrue(start < fDocument.getLength());
if (escapeChar == (char) 0) {
return findOpeningPeer(start, pair);
}
try {
final StopCondition cond = createFindPeerStopCondition(start, pair, escapeChar);
final int bound = createBackwardBound(start);
int depth= 1;
start += 1;
fLine = fDocument.getLineOfOffset(start);
while (true) {
final int[] list = preScanBackward(start - 1, bound, cond);
if (list == null) {
return NOT_FOUND;
}
for (int i = list.length-1; i >= 0; i--) {
start = list[i];
if (fDocument.getChar(start) == pair[CLOSING_PEER]) {
depth++;
} else {
depth--;
}
if (depth == 0) {
return start;
}
}
start = fDocument.getLineOffset(fLine+1);
}
}
catch (final BadLocationException e) {
return NOT_FOUND;
}
}
public int computePairBalance(int backwardOffset, final int backwardBound,
int forwardOffset, final int forwardBound, final int initial,
final char[] pair, final char escapeChar) {
int balance= 0;
final StopCondition condition= createFindPeerStopCondition(forwardBound, pair, escapeChar);
ITER_BACKWARD : while (--backwardOffset >= 0) {
backwardOffset= scanBackward(backwardOffset, backwardBound, condition);
if (backwardOffset != NOT_FOUND) {
if (this.fChar == pair[OPENING_PEER]) {
balance++;
}
else {
balance--;
}
}
else {
break ITER_BACKWARD;
}
}
if (balance < 0) {
balance= 0;
}
balance+= initial;
ITER_FORWARD : while (forwardOffset < forwardBound) {
forwardOffset= scanForward(forwardOffset, forwardBound, condition);
if (forwardOffset != NOT_FOUND) {
if (this.fChar == pair[OPENING_PEER]) {
balance++;
}
else {
balance--;
}
if (balance == 0) {
break ITER_FORWARD;
}
forwardOffset++;
}
else {
break ITER_FORWARD;
}
}
return balance;
}
/**
* Finds the smallest position in <code>fDocument</code> such that the position is >= <code>position</code>
* and < <code>bound</code> and <code>Character.isWhitespace(fDocument.getChar(pos))</code> evaluates to <code>false</code>.
*
* @param position the first character position in <code>fDocument</code> to be considered
* @param bound the first position in <code>fDocument</code> to not consider any more, with <code>bound</code> > <code>position</code>, or <code>UNBOUND</code>
* @return the smallest position of a non-whitespace character in [<code>position</code>, <code>bound</code>), or <code>NOT_FOUND</code> if none can be found
*/
public final int findAnyNonBlankForward(final int position, final int bound, final boolean linebreakIsBlank) {
return scanForward(position, bound, linebreakIsBlank ?
getAnyNonWSorLRCondition() : getAnyNonWSCondition());
}
public final int findAnyNonBlankBackward(final int position, final int bound, final boolean linebreakIsBlank) {
return scanBackward(position-1, bound, linebreakIsBlank ?
getAnyNonWSorLRCondition() : getAnyNonWSCondition());
}
public final int findNonBlankForward(final int position, final int bound, final boolean linebreakIsBlank) {
return scanForward(position, bound, linebreakIsBlank ?
getNonWSorLRCondition() : getNonWSCondition());
}
public final int findNonBlankBackward(final int position, final int bound, final boolean linebreakIsBlank) {
return scanBackward(position-1, bound, linebreakIsBlank ?
getNonWSorLRCondition() : getNonWSCondition());
}
public IRegion findBlankRegion(final int position, final boolean linebreakIsBlank) {
return findRegion(position, linebreakIsBlank ?
getAnyNonWSorLRCondition() : getAnyNonWSCondition());
}
public boolean isBlankLine(final int position) throws BadLocationException {
final IRegion line = fDocument.getLineInformationOfOffset(position);
if (line.getLength() > 0) {
final int nonWhitespace = findAnyNonBlankForward(line.getOffset(), line.getOffset()+line.getLength(), false);
return (nonWhitespace == NOT_FOUND);
}
return true;
}
public final IRegion findCommonWord(final int position) {
return findRegion(position, new StopCondition() {
@Override
public boolean stop() {
return (!Character.isLetterOrDigit(fChar));
}
});
}
public final int getFirstLineOfRegion(final IRegion region) throws BadLocationException {
return fDocument.getLineOfOffset(region.getOffset());
}
public final int getLastLineOfRegion(final IRegion region) throws BadLocationException {
if (region.getLength() == 0) {
return fDocument.getLineOfOffset(region.getOffset());
}
return fDocument.getLineOfOffset(region.getOffset()+region.getLength()-1);
}
private final int[] preScanBackward(final int start, final int bound, final StopCondition condition) throws BadLocationException {
final IntList list = new ArrayIntList();
int scanEnd = start+1;
NEXT_LINE: while (list.isEmpty() && fLine >= 0) {
final int lineOffset = fDocument.getLineOffset(fLine);
int next = lineOffset - 1;
while ((next = scanForward(next + 1, scanEnd, condition)) != NOT_FOUND) {
if (bound < next) {
list.add(next);
}
}
fLine--;
if (lineOffset <= bound) {
break NEXT_LINE;
}
scanEnd = lineOffset;
}
if (!list.isEmpty()) {
return list.toArray();
}
return null;
}
/**
* Finds the lowest position <code>p</code> in <code>fDocument</code> such that <code>start</code> <= p <
* <code>bound</code> and <code>condition.stop(fDocument.getChar(p), p)</code> evaluates to <code>true</code>.
*
* @param start the first character position in <code>fDocument</code> to be considered
* @param bound the first position in <code>fDocument</code> to not consider any more, with <code>bound</code> > <code>start</code>, or <code>UNBOUND</code>
* @param condition the <code>StopCondition</code> to check
* @return the lowest position in [<code>start</code>, <code>bound</code>) for which <code>condition</code> holds, or <code>NOT_FOUND</code> if none can be found
*/
protected final int scanForward(final int start, int bound, final StopCondition condition) {
if (bound == UNBOUND) {
bound = fDocument.getLength();
}
assert(bound <= fDocument.getLength());
assert(start >= 0);
try {
fPos = start;
while (fPos < bound) {
fChar = fDocument.getChar(fPos);
if (condition.stop()) {
return fPos;
}
fPos = condition.nextPositionForward();
}
fPos = bound;
fChar = (fPos >= 0 && fPos < fDocument.getLength()) ? fDocument.getChar(fPos) : (char) -1;
}
catch (final BadLocationException e) {
}
return NOT_FOUND;
}
/**
* Finds the lowest position in <code>fDocument</code> such that the position is >= <code>position</code>
* and < <code>bound</code> and <code>fDocument.getChar(position) == ch</code> evaluates to <code>true</code>
* and the position is in the default partition.
*
* @param position the first character position in <code>fDocument</code> to be considered
* @param bound the first position in <code>fDocument</code> to not consider any more, with <code>bound</code> > <code>position</code>, or <code>UNBOUND</code>
* @param ch the <code>char</code> to search for
* @return the lowest position of <code>ch</code> in (<code>bound</code>, <code>position</code>] that resides in a Java partition, or <code>NOT_FOUND</code> if none can be found
*/
public final int scanForward(final int position, final int bound, final char ch) {
return scanForward(position, bound, new SingleCharacterMatchCondition(ch));
}
/**
* Finds the lowest position in <code>fDocument</code> such that the position is >= <code>position</code>
* and < <code>bound</code> and <code>fDocument.getChar(position) == ch</code> evaluates to <code>true</code> for at least one
* ch in <code>chars</code> and the position is in the default partition.
*
* @param position the first character position in <code>fDocument</code> to be considered
* @param bound the first position in <code>fDocument</code> to not consider any more, with <code>bound</code> > <code>position</code>, or <code>UNBOUND</code>
* @param chars an array of <code>char</code> to search for
* @return the lowest position of a non-whitespace character in [<code>position</code>, <code>bound</code>) that resides in a Java partition, or <code>NOT_FOUND</code> if none can be found
*/
public final int scanForward(final int position, final int bound, final char[] chars) {
return scanForward(position, bound, new CharacterMatchCondition(chars));
}
/** Provisional / Not Tested */
public final int scanForward(final int position, int bound, final String s, final char escapeChar) {
if (bound == UNBOUND) {
bound = fDocument.getLength();
}
bound -= s.length();
return scanForward(position, bound, new StringMatchCondition(s, escapeChar));
}
/**
* Finds the highest position <code>p</code> in <code>fDocument</code> such that <code>bound</code> < <code>p</code> <= <code>start</code>
* and <code>condition.stop(fDocument.getChar(p), p)</code> evaluates to <code>true</code>.
*
* @param start the first character position in <code>fDocument</code> to be considered
* @param bound the first position in <code>fDocument</code> to not consider any more, with <code>bound</code> < <code>start</code>, or <code>UNBOUND</code>
* @param condition the <code>StopCondition</code> to check
* @return the highest position in (<code>bound</code>, <code>start</code> for which <code>condition</code> holds, or <code>NOT_FOUND</code> if none can be found
*/
protected final int scanBackward(final int start, int bound, final StopCondition condition) {
if (bound == UNBOUND) {
bound = -1;
}
assert(bound >= -1);
// assert(start == 0 || start < fDocument.getLength() );
try {
if (fDocument.getLength() > 0) {
fPos = start;
while (fPos > bound) {
fChar = fDocument.getChar(fPos);
if (condition.stop()) {
return fPos;
}
fPos = condition.nextPositionBackward();
}
}
fPos = bound;
fChar = (fPos >= 0 && fPos < fDocument.getLength()) ? fDocument.getChar(fPos) : (char) -1;
}
catch (final BadLocationException e) {
}
return NOT_FOUND;
}
/**
* Finds the highest position in <code>fDocument</code> such that the position is <= <code>position</code>
* and > <code>bound</code> and <code>fDocument.getChar(position) == ch</code> evaluates to <code>true</code> for at least one
* ch in <code>chars</code> and the position is in the default partition.
*
* @param position the first character position in <code>fDocument</code> to be considered
* @param bound the first position in <code>fDocument</code> to not consider any more, with <code>bound</code> < <code>position</code>, or <code>UNBOUND</code>
* @param ch the <code>char</code> to search for
* @return the highest position of one element in <code>chars</code> in (<code>bound</code>, <code>position</code>] that resides in a Java partition, or <code>NOT_FOUND</code> if none can be found
*/
public final int scanBackward(final int position, final int bound, final char ch) {
return scanBackward(position, bound, new SingleCharacterMatchCondition(ch));
}
/**
* Finds the highest position in <code>fDocument</code> such that the position is <= <code>position</code>
* and > <code>bound</code> and <code>fDocument.getChar(position) == ch</code> evaluates to <code>true</code> for at least one
* ch in <code>chars</code> and the position is in the default partition.
*
* @param position the first character position in <code>fDocument</code> to be considered
* @param bound the first position in <code>fDocument</code> to not consider any more, with <code>bound</code> < <code>position</code>, or <code>UNBOUND</code>
* @param chars an array of <code>char</code> to search for
* @return the highest position of one element in <code>chars</code> in (<code>bound</code>, <code>position</code>] that resides in a Java partition, or <code>NOT_FOUND</code> if none can be found
*/
public final int scanBackward(final int position, final int bound, final char[] chars) {
return scanBackward(position, bound, new CharacterMatchCondition(chars));
}
public final int count(int start, final int stop, final char c) {
int count = 0;
final SingleCharacterMatchCondition condition = new SingleCharacterMatchCondition(c);
while (start < stop && (start = scanForward(start, stop, condition)) != NOT_FOUND) {
count++;
start++;
}
return count;
}
protected final IRegion findRegion(final int position, final StopCondition condition) {
return findRegion(position, condition, false);
}
protected final IRegion findRegion(final int position, final StopCondition condition, final boolean allowClosing) {
int start = position;
int end = scanForward(position, UNBOUND, condition);
if (end == NOT_FOUND) {
end = fPos;
}
if (allowClosing || end > position) {
start = scanBackward(--start, UNBOUND, condition);
if (start == NOT_FOUND) {
start = fPos;
}
start++;
}
if (start < end) {
return new Region(start, end-start);
}
return null;
}
/**
* Returns the partition at <code>position</code>.
*
* @param position the position to get the partition for
* @return the content type at <code>position</code> or a dummy zero-length
* partition if accessing the document fails
*/
protected final String getContentType() {
try {
return TextUtilities.getContentType(fDocument, this.partitioning, fPos, false);
}
catch (final BadLocationException e) {
return null; // ?
}
}
/**
* Returns the partition at current position of the scanner (#fPos).
*
* @param position the position to get the partition for
* @return the partition at <code>position</code> or a dummy zero-length
* partition if accessing the document fails
*/
protected final ITypedRegion getPartition() {
try {
return TextUtilities.getPartition(fDocument, this.partitioning, fPos, false);
}
catch (final BadLocationException e) {
return new TypedRegion(fPos, 0, "__no_partition_at_all"); //$NON-NLS-1$
}
}
/**
* Returns the partition at <code>position</code>.
*
* @param position the position to get the partition for
* @return the partition at <code>position</code> or a dummy zero-length
* partition if accessing the document fails
*/
public final ITypedRegion getPartition(final int position) {
try {
return TextUtilities.getPartition(fDocument, this.partitioning, position, false);
}
catch (final BadLocationException e) {
return new TypedRegion(fPos, 0, "__no_partition_at_all"); //$NON-NLS-1$
}
}
}