/** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.waveprotocol.wave.model.document.util; import org.waveprotocol.wave.model.document.ReadableDocument; import org.waveprotocol.wave.model.document.util.Point.Tx; /** * Class for locating text in a document. * */ public final class TextLocator { /** * Locates the index of a character matching some criteria. */ private interface CharacterLocator { int indexOf(String data, int start, boolean forward); } /** * Checks if a character satisfies some criteria. */ public interface CharacterPredicate { boolean apply(char c); } /** * Utility class. */ private TextLocator() {} private static class CharacterLocatorImpl implements CharacterLocator { protected final String characters; public CharacterLocatorImpl(String characters) { this.characters = characters; } @Override public int indexOf(String data, int start, boolean forward) { return forward ? data.indexOf(characters, start) : data.lastIndexOf(characters, start); } } private static class PredicateBoundaryLocator implements CharacterLocator { private final CharacterPredicate pred; public PredicateBoundaryLocator(CharacterPredicate pred) { this.pred = pred; } private int findForwards(String data, int start) { for (int i = start; i < data.length(); i++) { if (pred.apply(data.charAt(i))) { return i; } } return -1; } private int findBackwards(String data, int start) { for (int i = start - 1; i >= 0 ; i--) { if (pred.apply(data.charAt(i))) { return i + 1; } } return -1; } @Override public int indexOf(String data, int start, boolean forward) { return forward ? findForwards(data, start) : findBackwards(data, start); } } /** * Matches "word" characters */ private static final CharacterLocator wordCharactersBoundaryLocator = new PredicateBoundaryLocator(new CharacterPredicate() { @Override public boolean apply(char c) { return isWordCharacter(c); } }); /** * Matches "non-word" characters */ private static final CharacterLocator nonWordCharactersBoundaryLocator = new PredicateBoundaryLocator(new CharacterPredicate() { @Override public boolean apply(char c) { return !isWordCharacter(c); } }); /** * Contains characters treated as whitespace for the purpose of finding an * appropriate insertion point when inserting a doodad. */ private static final String INLINE_WHITESPACE = " \t"; private static boolean isInlineWhitespace(char c) { return INLINE_WHITESPACE.indexOf(c) != -1; } /** * Predicate that matches inline whitespace characters. */ public static final CharacterPredicate WHITESPACE_MATCHER = new CharacterPredicate() { @Override public boolean apply(char c) { return isInlineWhitespace(c); } }; /** * Predicate that matches non-inline whitespace characters. */ public static final CharacterPredicate NON_WHITESPACE_MATCHER = new CharacterPredicate() { @Override public boolean apply(char c) { return !isInlineWhitespace(c); } }; /** * The intent of this function is to check for "word characters" i.e. letters * and digits and other characters that are considered part of a word such as * _. * * At the moment, we are approximating this by using * Character.isLetterOrDigit, but this doesn't work with unicode letters in * GWT. * * We can also consider regexp match with \w which should match with * "word characters", but in current browsers, these don't match with unicode * letters. */ private static boolean isWordCharacter(char codePoint) { return Character.isLetterOrDigit(codePoint) || codePoint == '_'; } private static <N, E extends N, T extends N> Tx<N> findCharacterInNode( ReadableDocument<N, E, T> doc, Tx<N> start, CharacterLocator locator, boolean forward) { assert start.getContainer() != null && doc.asText(start.getContainer()) != null; String data = doc.getData(doc.asText(start.getContainer())); int index = locator.indexOf(data, start.getTextOffset(), forward); if (index != -1) { return Point.inText(start.getContainer(), index); } else { return null; } } /** * Locates characters in contiguous of text nodes. * * Returns location when given locator matches with the data in a text node, * Returns null if not found. * * @param doc * @param start * @param locator * @param forward */ private static <N, E extends N, T extends N> Tx<N> locateCharacters( ReadableDocument<N, E, T> doc, Tx<N> start, CharacterLocator locator, boolean forward) { Tx<N> current = start; N node = start.getContainer(); Tx<N> found = null; while (true) { found = findCharacterInNode(doc, current.asTextPoint(), locator, forward); if (found != null) { return found; } node = forward ? doc.getNextSibling(node) : doc.getPreviousSibling(node); if (doc.asText(node) != null) { current = forward ? Point.inText(node, 0) : Point.inText(node, doc.getLength(doc.asText(node))); } else { return null; } } } /** * Find the next point in the neighbouring sequence of text nodes that matches * a set of characters. * * Returns the last point in the text node sequence if we cannot find a match. * * @param start the point we want to start the search * @param characters characters that we want to match * @param forward if true search forwards, else search backwards. */ public static <N, E extends N, T extends N> Tx<N> findCharacter( ReadableDocument<N, E, T> doc, Tx<N> start, String characters, boolean forward) { CharacterLocator locator = new CharacterLocatorImpl(characters); Tx<N> boundary; boundary = locateCharacters(doc, start, locator, forward); if (boundary == null) { boundary = lastPointInTextSequence(doc, start, forward); } return boundary; } /** * Find the next point in the neighbouring sequence of text node where * the character there matches some criteria. * * Returns the last point in the text node sequence if we cannot find a match. * * @param start the point we want to start the search * @param pred the criteria for the character we want to match * @param forward if true search forwards, else search backwards. */ public static <N, E extends N, T extends N> Tx<N> findCharacterBoundary( ReadableDocument<N, E, T> doc, Tx<N> start, CharacterPredicate pred, boolean forward) { CharacterLocator locator = new PredicateBoundaryLocator(pred); Tx<N> boundary; boundary = locateCharacters(doc, start, locator, forward); if (boundary == null) { boundary = lastPointInTextSequence(doc, start, forward); } return boundary; } /** * Gets the next word boundary * * NOTE(user): At the moment this only works in a contiguous sequence of text * nodes. * * @param start the point to start the search * @param forward if true, search forwards, else search backwards */ public static <N, E extends N, T extends N> Tx<N> getWordBoundary(Point<N> start, ReadableDocument<N, E, T> doc, boolean forward) { Tx<N> startAsTx = start.asTextPoint(); if (startAsTx == null) { return null; } Tx<N> firstWordCharacter = locateCharacters(doc, startAsTx, wordCharactersBoundaryLocator, forward); Tx<N> boundary = null; if (firstWordCharacter != null) { boundary = locateCharacters(doc, firstWordCharacter, nonWordCharactersBoundaryLocator, forward); } if (boundary == null) { boundary = lastPointInTextSequence(doc, startAsTx, forward); } return boundary; } /** * This method returns the last point in a sequence of text node in the given direction. * * We guarantee that the return value is non-null and inside a text node. * * @return the last point in the text sequence as a text point. */ private static <N, E extends N, T extends N> Tx<N> lastPointInTextSequence( ReadableDocument<N, E, T> doc, Tx<N> start, boolean forward) { Tx<N> ret; if (forward) { T t = doc.asText(start.getCanonicalNode()); T next = doc.asText(doc.getNextSibling(t)); while (next != null) { t = next; next = doc.asText(doc.getNextSibling(t)); } ret = Point.<N> inText(t, doc.getLength(t)); } else { T t = doc.asText(start.getCanonicalNode()); T prev = doc.asText(doc.getPreviousSibling(t)); while (prev != null) { t = prev; prev = doc.asText(doc.getPreviousSibling(t)); } ret = Point.<N> inText(t, 0); } return ret; } }