/* * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved * * The original version of this source code and documentation is * copyrighted and owned by IBM. These materials are provided * under terms of a License Agreement between IBM and Sun. * This technology is protected by multiple US and International * patents. This notice and attribution to IBM may not be removed. */ package java.text; import java.awt.Toolkit; import java.awt.font.TextAttribute; import java.awt.font.NumericShaper; import sun.text.CodePointIterator; /** {@collect.stats} * This class implements the Unicode Bidirectional Algorithm. * <p> * A Bidi object provides information on the bidirectional reordering of the text * used to create it. This is required, for example, to properly display Arabic * or Hebrew text. These languages are inherently mixed directional, as they order * numbers from left-to-right while ordering most other text from right-to-left. * <p> * Once created, a Bidi object can be queried to see if the text it represents is * all left-to-right or all right-to-left. Such objects are very lightweight and * this text is relatively easy to process. * <p> * If there are multiple runs of text, information about the runs can be accessed * by indexing to get the start, limit, and level of a run. The level represents * both the direction and the 'nesting level' of a directional run. Odd levels * are right-to-left, while even levels are left-to-right. So for example level * 0 represents left-to-right text, while level 1 represents right-to-left text, and * level 2 represents left-to-right text embedded in a right-to-left run. * * @since 1.4 */ public final class Bidi { byte dir; byte baselevel; int length; int[] runs; int[] cws; static { sun.font.FontManagerNativeLibrary.load(); } /** {@collect.stats} Constant indicating base direction is left-to-right. */ public static final int DIRECTION_LEFT_TO_RIGHT = 0; /** {@collect.stats} Constant indicating base direction is right-to-left. */ public static final int DIRECTION_RIGHT_TO_LEFT = 1; /** {@collect.stats} * Constant indicating that the base direction depends on the first strong * directional character in the text according to the Unicode * Bidirectional Algorithm. If no strong directional character is present, * the base direction is left-to-right. */ public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2; /** {@collect.stats} * Constant indicating that the base direction depends on the first strong * directional character in the text according to the Unicode * Bidirectional Algorithm. If no strong directional character is present, * the base direction is right-to-left. */ public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1; private static final int DIR_MIXED = 2; /** {@collect.stats} * Create Bidi from the given paragraph of text and base direction. * @param paragraph a paragraph of text * @param flags a collection of flags that control the algorithm. The * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT, * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT. * Other values are reserved. */ public Bidi(String paragraph, int flags) { if (paragraph == null) { throw new IllegalArgumentException("paragraph is null"); } nativeBidiChars(this, paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags); } /** {@collect.stats} * Create Bidi from the given paragraph of text. * <p> * The RUN_DIRECTION attribute in the text, if present, determines the base * direction (left-to-right or right-to-left). If not present, the base * direction is computes using the Unicode Bidirectional Algorithm, defaulting to left-to-right * if there are no strong directional characters in the text. This attribute, if * present, must be applied to all the text in the paragraph. * <p> * The BIDI_EMBEDDING attribute in the text, if present, represents embedding level * information. Negative values from -1 to -62 indicate overrides at the absolute value * of the level. Positive values from 1 to 62 indicate embeddings. Where values are * zero or not defined, the base embedding level as determined by the base direction * is assumed. * <p> * The NUMERIC_SHAPING attribute in the text, if present, converts European digits to * other decimal digits before running the bidi algorithm. This attribute, if present, * must be applied to all the text in the paragraph. * * @param paragraph a paragraph of text with optional character and paragraph attribute information * * @see java.awt.font.TextAttribute#BIDI_EMBEDDING * @see java.awt.font.TextAttribute#NUMERIC_SHAPING * @see java.awt.font.TextAttribute#RUN_DIRECTION */ public Bidi(AttributedCharacterIterator paragraph) { if (paragraph == null) { throw new IllegalArgumentException("paragraph is null"); } int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT; byte[] embeddings = null; int start = paragraph.getBeginIndex(); int limit = paragraph.getEndIndex(); int length = limit - start; int n = 0; char[] text = new char[length]; for (char c = paragraph.first(); c != paragraph.DONE; c = paragraph.next()) { text[n++] = c; } paragraph.first(); try { Boolean runDirection = (Boolean)paragraph.getAttribute(TextAttribute.RUN_DIRECTION); if (runDirection != null) { if (TextAttribute.RUN_DIRECTION_LTR.equals(runDirection)) { flags = DIRECTION_LEFT_TO_RIGHT; // clears default setting } else { flags = DIRECTION_RIGHT_TO_LEFT; } } } catch (ClassCastException e) { } try { NumericShaper shaper = (NumericShaper)paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING); if (shaper != null) { shaper.shape(text, 0, text.length); } } catch (ClassCastException e) { } int pos = start; do { paragraph.setIndex(pos); Object embeddingLevel = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING); int newpos = paragraph.getRunLimit(TextAttribute.BIDI_EMBEDDING); if (embeddingLevel != null) { try { int intLevel = ((Integer)embeddingLevel).intValue(); if (intLevel >= -61 && intLevel < 61) { byte level = (byte)(intLevel < 0 ? (-intLevel | 0x80) : intLevel); if (embeddings == null) { embeddings = new byte[length]; } for (int i = pos - start; i < newpos - start; ++i) { embeddings[i] = level; } } } catch (ClassCastException e) { } } pos = newpos; } while (pos < limit); nativeBidiChars(this, text, 0, embeddings, 0, text.length, flags); } /** {@collect.stats} * Create Bidi from the given text, embedding, and direction information. * The embeddings array may be null. If present, the values represent embedding level * information. Negative values from -1 to -61 indicate overrides at the absolute value * of the level. Positive values from 1 to 61 indicate embeddings. Where values are * zero, the base embedding level as determined by the base direction is assumed. * @param text an array containing the paragraph of text to process. * @param textStart the index into the text array of the start of the paragraph. * @param embeddings an array containing embedding values for each character in the paragraph. * This can be null, in which case it is assumed that there is no external embedding information. * @param embStart the index into the embedding array of the start of the paragraph. * @param paragraphLength the length of the paragraph in the text and embeddings arrays. * @param flags a collection of flags that control the algorithm. The * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT, * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT. * Other values are reserved. */ public Bidi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags) { if (text == null) { throw new IllegalArgumentException("text is null"); } if (paragraphLength < 0) { throw new IllegalArgumentException("bad length: " + paragraphLength); } if (textStart < 0 || paragraphLength > text.length - textStart) { throw new IllegalArgumentException("bad range: " + textStart + " length: " + paragraphLength + " for text of length: " + text.length); } if (embeddings != null && (embStart < 0 || paragraphLength > embeddings.length - embStart)) { throw new IllegalArgumentException("bad range: " + embStart + " length: " + paragraphLength + " for embeddings of length: " + text.length); } if (embeddings != null) { // native uses high bit to indicate override, not negative value, sigh for (int i = embStart, embLimit = embStart + paragraphLength; i < embLimit; ++i) { if (embeddings[i] < 0) { byte[] temp = new byte[paragraphLength]; System.arraycopy(embeddings, embStart, temp, 0, paragraphLength); for (i -= embStart; i < paragraphLength; ++i) { if (temp[i] < 0) { temp[i] = (byte)(-temp[i] | 0x80); } } embeddings = temp; embStart = 0; break; } } } nativeBidiChars(this, text, textStart, embeddings, embStart, paragraphLength, flags); } /** {@collect.stats} * Private constructor used by line bidi. */ private Bidi(int dir, int baseLevel, int length, int[] data, int[] cws) { reset(dir, baseLevel, length, data, cws); } /** {@collect.stats} * Private mutator used by native code. */ private void reset(int dir, int baselevel, int length, int[] data, int[] cws) { this.dir = (byte)dir; this.baselevel = (byte)baselevel; this.length = length; this.runs = data; this.cws = cws; } /** {@collect.stats} * Create a Bidi object representing the bidi information on a line of text within * the paragraph represented by the current Bidi. This call is not required if the * entire paragraph fits on one line. * @param lineStart the offset from the start of the paragraph to the start of the line. * @param lineLimit the offset from the start of the paragraph to the limit of the line. */ public Bidi createLineBidi(int lineStart, int lineLimit) { if (lineStart == 0 && lineLimit == length) { return this; } int lineLength = lineLimit - lineStart; if (lineStart < 0 || lineLimit < lineStart || lineLimit > length) { throw new IllegalArgumentException("range " + lineStart + " to " + lineLimit + " is invalid for paragraph of length " + length); } if (runs == null) { return new Bidi(dir, baselevel, lineLength, null, null); } else { int cwspos = -1; int[] ncws = null; if (cws != null) { int cwss = 0; int cwsl = cws.length; while (cwss < cwsl) { if (cws[cwss] >= lineStart) { cwsl = cwss; while (cwsl < cws.length && cws[cwsl] < lineLimit) { cwsl++; } int ll = lineLimit-1; while (cwsl > cwss && cws[cwsl-1] == ll) { cwspos = ll; // record start of counter-directional whitespace --cwsl; --ll; } if (cwspos == lineStart) { // entire line is cws, so ignore return new Bidi(dir, baselevel, lineLength, null, null); } int ncwslen = cwsl - cwss; if (ncwslen > 0) { ncws = new int[ncwslen]; for (int i = 0; i < ncwslen; ++i) { ncws[i] = cws[cwss+i] - lineStart; } } break; } ++cwss; } } int[] nruns = null; int nlevel = baselevel; int limit = cwspos == -1 ? lineLimit : cwspos; int rs = 0; int rl = runs.length; int ndir = dir; for (; rs < runs.length; rs += 2) { if (runs[rs] > lineStart) { rl = rs; while (rl < runs.length && runs[rl] < limit) { rl += 2; } if ((rl > rs) || (runs[rs+1] != baselevel)) { rl += 2; if (cwspos != -1 && rl > rs && runs[rl-1] != baselevel) { // add level for cws nruns = new int[rl - rs + 2]; nruns[rl - rs] = lineLength; nruns[rl - rs + 1] = baselevel; } else { limit = lineLimit; nruns = new int[rl - rs]; } int n = 0; for (int i = rs; i < rl; i += 2) { nruns[n++] = runs[i] - lineStart; nruns[n++] = runs[i+1]; } nruns[n-2] = limit - lineStart; } else { ndir = (runs[rs+1] & 0x1) == 0 ? DIRECTION_LEFT_TO_RIGHT : DIRECTION_RIGHT_TO_LEFT; } break; } } return new Bidi(ndir, baselevel, lineLength, nruns, ncws); } } /** {@collect.stats} * Return true if the line is not left-to-right or right-to-left. This means it either has mixed runs of left-to-right * and right-to-left text, or the base direction differs from the direction of the only run of text. * @return true if the line is not left-to-right or right-to-left. */ public boolean isMixed() { return dir == DIR_MIXED; } /** {@collect.stats} * Return true if the line is all left-to-right text and the base direction is left-to-right. * @return true if the line is all left-to-right text and the base direction is left-to-right */ public boolean isLeftToRight() { return dir == DIRECTION_LEFT_TO_RIGHT; } /** {@collect.stats} * Return true if the line is all right-to-left text, and the base direction is right-to-left. * @return true if the line is all right-to-left text, and the base direction is right-to-left */ public boolean isRightToLeft() { return dir == DIRECTION_RIGHT_TO_LEFT; } /** {@collect.stats} * Return the length of text in the line. * @return the length of text in the line */ public int getLength() { return length; } /** {@collect.stats} * Return true if the base direction is left-to-right. * @return true if the base direction is left-to-right */ public boolean baseIsLeftToRight() { return (baselevel & 0x1) == 0; } /** {@collect.stats} * Return the base level (0 if left-to-right, 1 if right-to-left). * @return the base level */ public int getBaseLevel() { return baselevel; } /** {@collect.stats} * Return the resolved level of the character at offset. If offset is <0 or >= * the length of the line, return the base direction level. * @param offset the index of the character for which to return the level * @return the resolved level of the character at offset */ public int getLevelAt(int offset) { if (runs == null || offset < 0 || offset >= length) { return baselevel; } else { int i = 0; do { if (offset < runs[i]) { return runs[i+1]; } i += 2; } while (true); } } /** {@collect.stats} * Return the number of level runs. * @return the number of level runs */ public int getRunCount() { return runs == null ? 1 : runs.length / 2; } /** {@collect.stats} * Return the level of the nth logical run in this line. * @param run the index of the run, between 0 and <code>getRunCount()</code> * @return the level of the run */ public int getRunLevel(int run) { return runs == null ? baselevel : runs[run * 2 + 1]; } /** {@collect.stats} * Return the index of the character at the start of the nth logical run in this line, as * an offset from the start of the line. * @param run the index of the run, between 0 and <code>getRunCount()</code> * @return the start of the run */ public int getRunStart(int run) { return (runs == null || run == 0) ? 0 : runs[run * 2 - 2]; } /** {@collect.stats} * Return the index of the character past the end of the nth logical run in this line, as * an offset from the start of the line. For example, this will return the length * of the line for the last run on the line. * @param run the index of the run, between 0 and <code>getRunCount()</code> * @return limit the limit of the run */ public int getRunLimit(int run) { return runs == null ? length : runs[run * 2]; } /** {@collect.stats} * Return true if the specified text requires bidi analysis. If this returns false, * the text will display left-to-right. Clients can then avoid constructing a Bidi object. * Text in the Arabic Presentation Forms area of Unicode is presumed to already be shaped * and ordered for display, and so will not cause this function to return true. * * @param text the text containing the characters to test * @param start the start of the range of characters to test * @param limit the limit of the range of characters to test * @return true if the range of characters requires bidi analysis */ public static boolean requiresBidi(char[] text, int start, int limit) { CodePointIterator cpi = CodePointIterator.create(text, start, limit); for (int cp = cpi.next(); cp != CodePointIterator.DONE; cp = cpi.next()) { if (cp > 0x0590) { int dc = nativeGetDirectionCode(cp); if ((RMASK & (1 << dc)) != 0) { return true; } } } return false; } /** {@collect.stats} * Reorder the objects in the array into visual order based on their levels. * This is a utility function to use when you have a collection of objects * representing runs of text in logical order, each run containing text * at a single level. The elements at <code>index</code> from * <code>objectStart</code> up to <code>objectStart + count</code> * in the objects array will be reordered into visual order assuming * each run of text has the level indicated by the corresponding element * in the levels array (at <code>index - objectStart + levelStart</code>). * * @param levels an array representing the bidi level of each object * @param levelStart the start position in the levels array * @param objects the array of objects to be reordered into visual order * @param objectStart the start position in the objects array * @param count the number of objects to reorder */ public static void reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count) { if (count < 0) { throw new IllegalArgumentException("count " + count + " must be >= 0"); } if (levelStart < 0 || levelStart + count > levels.length) { throw new IllegalArgumentException("levelStart " + levelStart + " and count " + count + " out of range [0, " + levels.length + "]"); } if (objectStart < 0 || objectStart + count > objects.length) { throw new IllegalArgumentException("objectStart " + objectStart + " and count " + count + " out of range [0, " + objects.length + "]"); } byte lowestOddLevel = (byte)(NUMLEVELS + 1); byte highestLevel = 0; // initialize mapping and levels int levelLimit = levelStart + count; for (int i = levelStart; i < levelLimit; i++) { byte level = levels[i]; if (level > highestLevel) { highestLevel = level; } if ((level & 0x01) != 0 && level < lowestOddLevel) { lowestOddLevel = level; } } int delta = objectStart - levelStart; while (highestLevel >= lowestOddLevel) { int i = levelStart; for (;;) { while (i < levelLimit && levels[i] < highestLevel) { i++; } int begin = i++; if (begin == levelLimit) { break; // no more runs at this level } while (i < levelLimit && levels[i] >= highestLevel) { i++; } int end = i - 1; begin += delta; end += delta; while (begin < end) { Object temp = objects[begin]; objects[begin] = objects[end]; objects[end] = temp; ++begin; --end; } } --highestLevel; } } private static final char NUMLEVELS = 62; private static final int RMASK = (1 << 1 /* U_RIGHT_TO_LEFT */) | (1 << 5 /* U_ARABIC_NUMBER */) | (1 << 13 /* U_RIGHT_TO_LEFT_ARABIC */) | (1 << 14 /* U_RIGHT_TO_LEFT_EMBEDDING */) | (1 << 15 /* U_RIGHT_TO_LEFT_OVERRIDE */); /** {@collect.stats} Access native bidi implementation. */ private static native int nativeGetDirectionCode(int cp); /** {@collect.stats} Access native bidi implementation. */ private static synchronized native void nativeBidiChars(Bidi bidi, char[] text, int textStart, byte[] embeddings, int embeddingStart, int length, int flags); /** {@collect.stats} * Display the bidi internal state, used in debugging. */ public String toString() { StringBuffer buf = new StringBuffer(super.toString()); buf.append("[dir: " + dir); buf.append(" baselevel: " + baselevel); buf.append(" length: " + length); if (runs == null) { buf.append(" runs: null"); } else { buf.append(" runs: ["); for (int i = 0; i < runs.length; i += 2) { if (i != 0) { buf.append(' '); } buf.append(runs[i]); // limit buf.append('/'); buf.append(runs[i+1]); // level } buf.append(']'); } if (cws == null) { buf.append(" cws: null"); } else { buf.append(" cws: ["); for (int i = 0; i < cws.length; ++i) { if (i != 0) { buf.append(' '); } buf.append(Integer.toHexString(cws[i])); } buf.append(']'); } buf.append(']'); return buf.toString(); } }