// Copyright 2012 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.collide.client.document.linedimensions; import com.google.collide.client.document.linedimensions.ColumnOffsetCache.ColumnOffset; import com.google.collide.client.util.dom.FontDimensionsCalculator; import com.google.collide.client.util.dom.FontDimensionsCalculator.FontDimensions; import com.google.collide.json.shared.JsonArray; import com.google.collide.json.shared.JsonStringMap; import com.google.collide.shared.Pair; import com.google.collide.shared.document.Document; import com.google.collide.shared.document.Line; import com.google.collide.shared.document.TextChange; import com.google.collide.shared.document.Document.PreTextListener; import com.google.collide.shared.document.Document.TextListener; import com.google.collide.shared.document.TextChange.Type; import com.google.collide.shared.util.JsonCollections; import com.google.collide.shared.util.StringUtils; import com.google.collide.shared.util.UnicodeUtils; import com.google.collide.shared.util.ListenerRegistrar.RemoverManager; import com.google.gwt.regexp.shared.MatchResult; import com.google.gwt.regexp.shared.RegExp; /** * An object which can accurately measure a {@link Line} and map X coordinates * to columns and vice versa. */ /* * TL;DR; * * We have a fast regex to tell whether a line can use the naive calculations * for column/x. If it doesn't we have to put it in a span and measure special * characters then store how much they effect the width of our columns. Tabs * which are prefixing indentation and suffix carraige returns are special cased * and don't require measurements. */ /* * Implementation Details * * There are three states a line can be in: * * 1. Unknown, this is represented by the lack of a NEEDS_OFFSET tag on the * Line. This is the default line state and means that a user (or this class) * has yet to visit this line in the document. * * 2. No offset needed (false NEEDS_OFFSET value), this state indicates that * we've visited the line and decided it has no characters which warrant special * attention. Prefix tabs and suffix carriage returns are included in this state * since they are a common case. If they exist they will be handled in some very * simple offset code. * * 3. Offset needed (true NEEDS_OFFSET value), this state indicates that there * are special characters within this line. This includes tabs which appear in * the middle of a line or carriage-returns which aren't at the end followed by * a \n. In this state a ColumnOffsetCache is built and put onto the line. It is * lazily constructed as columns are requested. * * ColumnOffsetCache internals: * * Once built and attached to a line, the column offset cache maintains a cache * entry for each special character on a line. The entry will be made for the * column immediately following the special character and the x value of that * entry will represent the left edge of that column (the right edge of the * column with the special character). * * All examples assume double-wide characters are 10px and normal characters are * 5px wide, combining marks are 0px. * * For example, if the line is "烏烏龍茶\n" the cache will contain: * * [{column: 1, x: 10px }, {column: 2, x: 20px }, {column: 3, x: 30px }, * {column: 4, x: 40px }] * * Note that this last entry exists regardless of the \n existing. So the last * entry may be a column which does not exist in the string. * * Using the example, when column 2's x is requested (that is the third * character in the string), entry [1] will be pulled from the cache and the * returned left edge will be 20px. * * A cache entry is only created for special width characters, any interesting * character that turns out to be our same width is ignored as well. * * For example in the case of "烏aaa烏" the cache will contain: * * [{column: 1, x: 10px }, {column: 5, x: 25px }] * * In this example if we were interested in column 2 (that is the 3rd character * in the string or 2nd 'a' character), entry [1] will be pulled from cache, * then 10px + 5px will be added together and a left edge of 15px will be * returned. * * An important note on combining marks: * * Combining marks and some similar characters, show up in the cache as 0 width * characters. That is when they are measured they add no additional width to * the string and function as a zero width column. They will still contain an * entry in cache but it will be marked as isZeroWidth and the x value will * correspond to the same x value of the previous column. * * For example, if the line is "à=à" (note the first a has a combining mark so * the string is really more like "a`=à" the cache will contain: * * [{column: 2, x: 5px, isZeroWidth: true }] * * That means if we look up say character 4 (the last à). We will take 5px and * add 5px to get 10px which would put us to the right of the '=' when rendered. * * How the cache is built: * * We scan the string for any characters of interest (we vaguely define that as * any character not part of the original latin alphabet so character code > * 255). We then measure the entire string up to and including that character. * If the string matches the length we'd expect, then we cache that the * character is normal but do not create an offset cache entry. Otherwise we * store the width of the character so we don't have to measure for it again, * then create an offset cache entry to denote that this character affects * columns past it since it has an odd width. * * Current Limitations: * * Some combining marks combine in both directions (this is mostly script type * languages). This means they can affect the width of columns before themselves * (aka make the character the combine width smaller). We have a hack to * mitigate this but really there's only so much we can do. * * Measuring can be expensive in long strings containing a large number of * different special characters. So if you paste a 500 character string of * Katakana, then click at the very end, prepare for a small wait while we catch * up. The good news is that will be mitigated the second time since each of * those character's width is cached and we won't have to layout again. * * Zoom makes us wipe the entire cache (sucks), Unfortunately this can't be * avoided as different character's scale at different factors (double sucks). * So we just clear our cache and go about rebuilding. * * Further comments: * * I'm not sure how docs does it but they seem pretty quick, they bite it hard * on combining marks (the cursor just moves over the letter), but they do * handle wider characters fine. The one thing they do have is Arabic makes the * cursor go right-to-left but still combining mark's don't quite work and in * fact some don't render correctly at all (which do otherwise). */ public class LineDimensionsCalculator { /** * Creates a new {@link LineDimensionsCalculator} from a * {@link FontDimensionsCalculator}. */ public static LineDimensionsCalculator create(FontDimensionsCalculator fontCalculator) { final LineDimensionsCalculator calculator = new LineDimensionsCalculator(new BrowserMeasurementProvider(fontCalculator)); // add a listener so that we can clear our cache if the dimensions change. fontCalculator.addCallback(new FontDimensionsCalculator.Callback() { @Override public void onFontDimensionsChanged(FontDimensions fontDimensions) { LineDimensionsCalculator.clearCharacterCacheDueToZoomChange(); } }); return calculator; } /** * Creates a new {@link LineDimensionsCalculator} with a custom * {@link MeasurementProvider}. */ static LineDimensionsCalculator createWithCustomProvider(MeasurementProvider provider) { return new LineDimensionsCalculator(provider); } /** * Specifies how a X-to-column conversion determines the column if the X isn't on the exact column * boundary. */ public enum RoundingStrategy { ROUND, FLOOR, CEIL; public int apply(double value) { switch (this) { case ROUND: return (int) Math.round(value); case FLOOR: return (int) Math.floor(value); case CEIL: return (int) Math.ceil(value); default: throw new IllegalStateException("Unexpected value for RoundingStrategy"); } } } /** * A cache used to cache the width of special characters. Would be final * except there isn't a fast way to clear a map. */ private static JsonStringMap<Double> characterWidthCache = JsonCollections.createMap(); /** * A listener which notifies us of dirty lines. We only have to handle the * case where the endLine != startLine since the startLine is handled in the * preTextListener. */ private static TextListener textListener = new TextListener() { @Override public void onTextChange(com.google.collide.shared.document.Document document, JsonArray<TextChange> textChanges) { for (int i = 0; i < textChanges.size(); i++) { TextChange change = textChanges.get(i); if (change.getEndLine() != change.getLine()) { LineDimensionsUtils.isOffsetNeededAndCache( change.getEndLine(), change.getEndColumn(), change.getType()); } } } }; /** * A listener which allows us to mark the cache dirty before a text change * actually takes place. */ private static PreTextListener preTextListener = new PreTextListener() { @Override public void onPreTextChange(Document document, Type type, Line line, int lineNumber, int column, String text) { /* * In the case where text is deleted, we only need to mark ourselves dirty * if there is already an OffsetCache. The insert case though requires * looking at the newly typed text for special characters. */ LineDimensionsUtils.preTextIsOffsetNeededAndCache(line, column, type, text); } }; private final RemoverManager listenerManager = new RemoverManager(); private final MeasurementProvider measurementProvider; private LineDimensionsCalculator(MeasurementProvider measurementProvider) { this.measurementProvider = measurementProvider; } /** * Sets the currently opened document so we can listen for mutations. */ public void handleDocumentChange(Document newDocument) { // Remove old document listener listenerManager.remove(); // add the new ones listenerManager.track(newDocument.getPreTextListenerRegistrar().add(preTextListener)); listenerManager.track(newDocument.getTextListenerRegistrar().add(textListener)); } /** * Converts a column to its x coordinate. */ public double convertColumnToX(Line line, int column) { // Simple case we early out if (column == 0) { return 0; } if (!LineDimensionsUtils.needsOffset(line)) { return simpleConvertColumnToX(line, column); } return convertColumnToXMeasuringIfNeeded(line, column); } /** * Converts an x coordinate to the Editor column. */ public int convertXToColumn(Line line, double x, RoundingStrategy roundingStrategy) { // Easy out (< can happen when selection dragging). if (x <= 0) { return 0; } if (!LineDimensionsUtils.needsOffset(line)) { return simpleConvertXToColumn(line, x, roundingStrategy); } return roundingStrategy.apply(convertXToColumnMeasuringIfNeeded(line, x)); } /** * Converts column to x using the {@link ColumnOffsetCache} stored on the * line, measuring if required. */ private double convertColumnToXMeasuringIfNeeded(Line line, int column) { LineDimensionsUtils.markTimeline(getClass(), "Begin converting Column To X via offset cache."); ColumnOffsetCache cache = ColumnOffsetCache.getOrCreate(line, getColumnWidth()); checkColumnInCacheAndMeasureIfNeeded(cache, line, column); ColumnOffset offset = cache.getColumnOffsetForColumn(column); LineDimensionsUtils.markTimeline(getClass(), "End converting Column To X via offset cache."); return smartColumnToX(offset, column); } /** * Converts x to a column using the {@link ColumnOffsetCache} stored on the * line, measuring if needed. */ private double convertXToColumnMeasuringIfNeeded(Line line, double x) { LineDimensionsUtils.markTimeline(getClass(), "Begin converting X To column via offset cache."); ColumnOffsetCache cache = ColumnOffsetCache.getOrCreate(line, getColumnWidth()); checkXInCacheAndMeasureIfNeeded(cache, line, x); Pair<ColumnOffset, Double> offsetAndWidth = cache.getColumnOffsetForX(x, getColumnWidth()); LineDimensionsUtils.markTimeline(getClass(), "End converting X To column via offset cache."); return smartXToColumn(offsetAndWidth.first, offsetAndWidth.second, x); } /** * Smart column to x conversion which converts a column to an x position based * on a {@link ColumnOffset}. */ private double smartColumnToX(ColumnOffset offset, int column) { if (offset.column == column) { return offset.x; } return offset.x + naiveColumnToX(column - offset.column); } /** * Smart x to column conversion which an x pixel position to a column based on * a {@link ColumnOffset}. */ private double smartXToColumn(ColumnOffset offset, double width, double x) { double column = offset.column; if (x == offset.x) { return column; } else if (x < offset.x + width) { /* * We are converting this exact column so lets taken into account this * columns length which may be special. */ column += (x - offset.x) / width; } else { // Figure out the offset in pixels and subtract then convert. column += naiveXToColumn(x - offset.x); } return column; } /** * Naively converts a column to its expected x value not taking into account * any special characters. */ private double naiveColumnToX(double column) { return column * getColumnWidth(); } /** * Naively converts a x pixel value to its expected column not taking into * account any special characters. */ private double naiveXToColumn(double x) { return x / getColumnWidth(); } /** * Finds the adjusted column number due to tab indentation and carriage * returns. This is used in the simple case to handle prefixing tabs and the * '\r\n' windows line format. Complex cases are handled in the * {@link ColumnOffsetCache}. */ private double simpleConvertColumnToX(Line line, int column) { // early out when we are at the start of the line if (column == 0) { return 0; } LineDimensionsUtils.markTimeline(getClass(), "Calculating simple offset"); // get any indentation tabs that are affecting us int offsetTabColumns = LineDimensionsUtils.getLastIndentationTabCount(line.getText(), column) * (LineDimensionsUtils.getTabWidth() - 1); int offsetCarriageReturn = 0; if (isColumnAffectedByCarriageReturn(line, column)) { offsetCarriageReturn = -1; } LineDimensionsUtils.markTimeline(getClass(), "End calculating simple offset"); return naiveColumnToX(offsetTabColumns + offsetCarriageReturn + column); } private int simpleConvertXToColumn(Line line, double x, RoundingStrategy roundingStrategy) { if (x == 0) { return 0; } LineDimensionsUtils.markTimeline(getClass(), "Calculating simple offset from x"); /* * we just have to be conscious here of prefix tabs which may be a different * width and suffix \r which is 0 width. We deal accordingly. */ /* * we divide x by the width of a tab in pixels to overshoot the number of * indentation tabs */ int columnIfAllTabs = (int) Math.floor(x / naiveColumnToX(LineDimensionsUtils.getTabWidth())); int offsetTabColumns = LineDimensionsUtils.getLastIndentationTabCount(line.getText(), columnIfAllTabs); assert columnIfAllTabs >= offsetTabColumns : "You appear to be less tabs then you say you are"; double lineWidthPxWithoutTabs = x - (offsetTabColumns * LineDimensionsUtils.getTabWidth() * getColumnWidth()); int column = roundingStrategy.apply(naiveXToColumn(lineWidthPxWithoutTabs) + offsetTabColumns); // if we landed on the carriage return column++ if (column < line.length() && line.getText().charAt(column) == '\r') { column++; } LineDimensionsUtils.markTimeline(getClass(), "End calculating simple offset from x"); return column; } /** * @return true if a measurement was performed. */ private boolean checkColumnInCacheAndMeasureIfNeeded( ColumnOffsetCache cache, Line line, int column) { if (cache.isColumnMeasurementNeeded(column)) { measureLineStoppingAtColumn(cache, line, column); return true; } return false; } /** * @return true if a measurement was performed. */ private boolean checkXInCacheAndMeasureIfNeeded(ColumnOffsetCache cache, Line line, double x) { if (cache.isXMeasurementNeeded(x)) { measureLineStoppingAtX(cache, line, x); return true; } return false; } /** * Builds the cache for a line up to or beyond the given endColumn value. * * @see #measureLine(ColumnOffsetCache, Line, int, double) */ private void measureLineStoppingAtColumn(ColumnOffsetCache cache, Line line, int endColumn) { measureLine(cache, line, endColumn, Double.MAX_VALUE); } /** * Builds the cache for a line up to or beyond the given endX value. * * @see #measureLine(ColumnOffsetCache, Line, int, double) */ private void measureLineStoppingAtX(ColumnOffsetCache cache, Line line, double endX) { measureLine(cache, line, Integer.MAX_VALUE, endX); } /** * Builds the cache for a line up to a particular column. Should not be called * if the line has already been {@link ColumnOffsetCache#FULLY_MEASURED}. * * <p> * You should only rely on either endColumn or endX, one or the other should * be the max value for its data type. * * @see #measureLineStoppingAtColumn(ColumnOffsetCache, Line, int) * @see #measureLineStoppingAtX(ColumnOffsetCache, Line, double) * * @param endColumn inclusive end column (we will end on or after end) * @param endX inclusive end x pixel width (we will end on or after endX) */ private void measureLine(ColumnOffsetCache cache, Line line, int endColumn, double endX) { /* * Starting at cache.measuredColumn we will use the regex to scan forward to * see if we hit an interesting character other than prefixed tab. if we do * we'll measure that to that point and append a {@link ColumnOffset} if it * is a special size. Rinse and repeat. */ LineDimensionsUtils.markTimeline(getClass(), "Beginning measure line"); RegExp regexp = UnicodeUtils.regexpNonAsciiTabOrCarriageReturn; regexp.setLastIndex(cache.measuredOffset.column); MatchResult result = regexp.exec(line.getText()); if (result != null) { double x = 0; do { // Calculate any x offset up to this point in the line ColumnOffset offset = cache.getLastColumnOffsetInCache(); double baseXOffset = smartColumnToX(offset, result.getIndex()); /* * TODO: we can be smarter here, if i > 1, then this character * is a mark. We could separate out the RegExp into non-spacing, * enclosing-marks v. spacing-marks and already know which are supposed * to be zero-width based on which groups are null. */ String match = result.getGroup(0); for (int i = 0; i < match.length(); i++) { x = addOffsetForResult(cache, match.charAt(i), result.getIndex() + i, line, baseXOffset); baseXOffset = x; } result = regexp.exec(line.getText()); // we have to ensure we measure through the last zero-width character. } while (result != null && result.getIndex() < endColumn && x < endX); } if (result == null) { cache.measuredOffset = ColumnOffsetCache.FULLY_MEASURED; return; } LineDimensionsUtils.markTimeline(getClass(), "Ending measure line"); } private double addOffsetForResult( ColumnOffsetCache cache, char matchedCharacter, int index, Line line, double baseXOffset) { /* * Get the string up to the current character, special casing tabs since * they must render as the correct number of spaces (we replace them when * the appropriate number of hard-spaces so the browser doesn't trim them). */ String partialLineText = line.getText().substring(0, index + 1).replace( "\t", StringUtils.repeatString("\u00A0", LineDimensionsUtils.getTabWidth())); /* * Get the width of the string including our special character and if needed * append an offset to the cache. */ double expectedWidth = baseXOffset + getColumnWidth(); double stringWidth = getStringWidth(matchedCharacter, baseXOffset, partialLineText); if (stringWidth < baseXOffset) { /* * This is a annoying condition where certain combining characters can * actually change how the previous character is rendered. In some cases * actually making it smaller than before. This is fairly annoying. It * only happens when some scripts and languages like Arabic with heavy * combining marks. This is also possible due to measurement * inconsistencies when measuring combining characters. * * Honestly there's not much we can do, but we make our best attempt to at * least provide a consistent cursor experience even if it isn't * navigating the characters correctly (not that I would even know, * considering I can't speak/read Arabic). */ stringWidth = baseXOffset; } if (stringWidth != expectedWidth) { cache.appendOffset(index + 1, stringWidth, stringWidth - baseXOffset); } return stringWidth; } /** * Returns the width of a column within the current zoom level. */ private double getColumnWidth() { return measurementProvider.getCharacterWidth(); } /** * Determines the width of a string using either the cached width of a * character of interest or by measuring it using a * {@link MeasurementProvider} * * @param characterOfInterest The character we are interested in which should * also be the last character of the textToMeasure string. * @param baseXOffset The base x offset of the column before the character of * interest. The returned result will be this offset + the width of the * characterOfInterest. * @param textToMeasureIncludingCharacterOfInterest The string of text to * measure including the character of interest. * * @return The width of the string which is baseXOffset + * characterOfInterestWidth */ private double getStringWidth(char characterOfInterest, double baseXOffset, String textToMeasureIncludingCharacterOfInterest) { switch (characterOfInterest) { case '\t': // base + columnWidth * tab_size_in_columns return baseXOffset + LineDimensionsUtils.getTabWidth() * getColumnWidth(); case '\r': // zero-width just return the baseXOffset return baseXOffset; default: Double characterWidth = characterWidthCache.get(String.valueOf(characterOfInterest)); // if we know the width already return it if (characterWidth != null) { return baseXOffset + characterWidth; } // Measure and store the width of the character double expectedWidth = baseXOffset + getColumnWidth(); double width = measurementProvider.measureStringWidth(textToMeasureIncludingCharacterOfInterest); // cache the width of this character characterWidthCache.put(String.valueOf(characterOfInterest), width - baseXOffset); return width; } } /** * Returns true if the column is past a carriage return at the end of a line. */ private static boolean isColumnAffectedByCarriageReturn(Line line, int column) { return line.length() >= 2 && column > line.length() - 2 && line.getText().charAt(line.length() - 2) == '\r'; } /** * Due to differences in how characters measure at different zoom levels (it's * not a constant factor for all character types!!!), we just clear the world * and rebuild. */ private static void clearCharacterCacheDueToZoomChange() { LineDimensionsUtils.markTimeline(LineDimensionsCalculator.class, "Cleared cache due to zoom"); characterWidthCache = JsonCollections.createMap(); } }