/**
* Copyright 2009 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.waveprotocol.wave.model.document.util;
import org.waveprotocol.wave.model.document.ReadableDocument;
import org.waveprotocol.wave.model.document.util.Point.Tx;
/**
* Class for locating text in a document.
*
*/
public final class TextLocator {
/**
* Locates the index of a character matching some criteria.
*/
private interface CharacterLocator {
int indexOf(String data, int start, boolean forward);
}
/**
* Checks if a character satisfies some criteria.
*/
public interface CharacterPredicate {
boolean apply(char c);
}
/**
* Utility class.
*/
private TextLocator() {}
private static class CharacterLocatorImpl implements CharacterLocator {
protected final String characters;
public CharacterLocatorImpl(String characters) {
this.characters = characters;
}
@Override
public int indexOf(String data, int start, boolean forward) {
return forward ? data.indexOf(characters, start) : data.lastIndexOf(characters, start);
}
}
private static class PredicateBoundaryLocator implements CharacterLocator {
private final CharacterPredicate pred;
public PredicateBoundaryLocator(CharacterPredicate pred) {
this.pred = pred;
}
private int findForwards(String data, int start) {
for (int i = start; i < data.length(); i++) {
if (pred.apply(data.charAt(i))) {
return i;
}
}
return -1;
}
private int findBackwards(String data, int start) {
for (int i = start - 1; i >= 0 ; i--) {
if (pred.apply(data.charAt(i))) {
return i + 1;
}
}
return -1;
}
@Override
public int indexOf(String data, int start, boolean forward) {
return forward ? findForwards(data, start) : findBackwards(data, start);
}
}
/**
* Matches "word" characters
*/
private static final CharacterLocator wordCharactersBoundaryLocator =
new PredicateBoundaryLocator(new CharacterPredicate() {
@Override
public boolean apply(char c) {
return isWordCharacter(c);
}
});
/**
* Matches "non-word" characters
*/
private static final CharacterLocator nonWordCharactersBoundaryLocator =
new PredicateBoundaryLocator(new CharacterPredicate() {
@Override
public boolean apply(char c) {
return !isWordCharacter(c);
}
});
/**
* Contains characters treated as whitespace for the purpose of finding an
* appropriate insertion point when inserting a doodad.
*/
private static final String INLINE_WHITESPACE = " \t";
private static boolean isInlineWhitespace(char c) {
return INLINE_WHITESPACE.indexOf(c) != -1;
}
/**
* Predicate that matches inline whitespace characters.
*/
public static final CharacterPredicate WHITESPACE_MATCHER = new CharacterPredicate() {
@Override
public boolean apply(char c) {
return isInlineWhitespace(c);
}
};
/**
* Predicate that matches non-inline whitespace characters.
*/
public static final CharacterPredicate NON_WHITESPACE_MATCHER = new CharacterPredicate() {
@Override
public boolean apply(char c) {
return !isInlineWhitespace(c);
}
};
/**
* The intent of this function is to check for "word characters" i.e. letters
* and digits and other characters that are considered part of a word such as
* _.
*
* At the moment, we are approximating this by using
* Character.isLetterOrDigit, but this doesn't work with unicode letters in
* GWT.
*
* We can also consider regexp match with \w which should match with
* "word characters", but in current browsers, these don't match with unicode
* letters.
*/
private static boolean isWordCharacter(char codePoint) {
return Character.isLetterOrDigit(codePoint) || codePoint == '_';
}
private static <N, E extends N, T extends N> Tx<N> findCharacterInNode(
ReadableDocument<N, E, T> doc, Tx<N> start, CharacterLocator locator, boolean forward) {
assert start.getContainer() != null && doc.asText(start.getContainer()) != null;
String data = doc.getData(doc.asText(start.getContainer()));
int index = locator.indexOf(data, start.getTextOffset(), forward);
if (index != -1) {
return Point.inText(start.getContainer(), index);
} else {
return null;
}
}
/**
* Locates characters in contiguous of text nodes.
*
* Returns location when given locator matches with the data in a text node,
* Returns null if not found.
*
* @param doc
* @param start
* @param locator
* @param forward
*/
private static <N, E extends N, T extends N> Tx<N> locateCharacters(
ReadableDocument<N, E, T> doc, Tx<N> start, CharacterLocator locator, boolean forward) {
Tx<N> current = start;
N node = start.getContainer();
Tx<N> found = null;
while (true) {
found = findCharacterInNode(doc, current.asTextPoint(), locator, forward);
if (found != null) {
return found;
}
node = forward ? doc.getNextSibling(node) : doc.getPreviousSibling(node);
if (doc.asText(node) != null) {
current =
forward ? Point.inText(node, 0) : Point.inText(node, doc.getLength(doc.asText(node)));
} else {
return null;
}
}
}
/**
* Find the next point in the neighbouring sequence of text nodes that matches
* a set of characters.
*
* Returns the last point in the text node sequence if we cannot find a match.
*
* @param start the point we want to start the search
* @param characters characters that we want to match
* @param forward if true search forwards, else search backwards.
*/
public static <N, E extends N, T extends N> Tx<N> findCharacter(
ReadableDocument<N, E, T> doc, Tx<N> start, String characters, boolean forward) {
CharacterLocator locator = new CharacterLocatorImpl(characters);
Tx<N> boundary;
boundary = locateCharacters(doc, start, locator, forward);
if (boundary == null) {
boundary = lastPointInTextSequence(doc, start, forward);
}
return boundary;
}
/**
* Find the next point in the neighbouring sequence of text node where
* the character there matches some criteria.
*
* Returns the last point in the text node sequence if we cannot find a match.
*
* @param start the point we want to start the search
* @param pred the criteria for the character we want to match
* @param forward if true search forwards, else search backwards.
*/
public static <N, E extends N, T extends N> Tx<N> findCharacterBoundary(
ReadableDocument<N, E, T> doc, Tx<N> start, CharacterPredicate pred, boolean forward) {
CharacterLocator locator = new PredicateBoundaryLocator(pred);
Tx<N> boundary;
boundary = locateCharacters(doc, start, locator, forward);
if (boundary == null) {
boundary = lastPointInTextSequence(doc, start, forward);
}
return boundary;
}
/**
* Gets the next word boundary
*
* NOTE(user): At the moment this only works in a contiguous sequence of text
* nodes.
*
* @param start the point to start the search
* @param forward if true, search forwards, else search backwards
*/
public static <N, E extends N, T extends N> Tx<N> getWordBoundary(Point<N> start,
ReadableDocument<N, E, T> doc, boolean forward) {
Tx<N> startAsTx = start.asTextPoint();
if (startAsTx == null) {
return null;
}
Tx<N> firstWordCharacter =
locateCharacters(doc, startAsTx, wordCharactersBoundaryLocator, forward);
Tx<N> boundary = null;
if (firstWordCharacter != null) {
boundary =
locateCharacters(doc, firstWordCharacter, nonWordCharactersBoundaryLocator, forward);
}
if (boundary == null) {
boundary = lastPointInTextSequence(doc, startAsTx, forward);
}
return boundary;
}
/**
* This method returns the last point in a sequence of text node in the given direction.
*
* We guarantee that the return value is non-null and inside a text node.
*
* @return the last point in the text sequence as a text point.
*/
private static <N, E extends N, T extends N> Tx<N> lastPointInTextSequence(
ReadableDocument<N, E, T> doc, Tx<N> start, boolean forward) {
Tx<N> ret;
if (forward) {
T t = doc.asText(start.getCanonicalNode());
T next = doc.asText(doc.getNextSibling(t));
while (next != null) {
t = next;
next = doc.asText(doc.getNextSibling(t));
}
ret = Point.<N> inText(t, doc.getLength(t));
} else {
T t = doc.asText(start.getCanonicalNode());
T prev = doc.asText(doc.getPreviousSibling(t));
while (prev != null) {
t = prev;
prev = doc.asText(doc.getPreviousSibling(t));
}
ret = Point.<N> inText(t, 0);
}
return ret;
}
}