/******************************************************************************* * Copyright (c) 2010, 2011 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation ******************************************************************************/ package org.eclipse.equinox.bidi.internal; import org.eclipse.equinox.bidi.advanced.*; import org.eclipse.equinox.bidi.custom.*; /** * Implementation for IStructuredTextExpert. */ public class StructuredTextImpl implements IStructuredTextExpert { static final String EMPTY_STRING = ""; //$NON-NLS-1$ // In the following lines, B, L, R and AL represent bidi categories // as defined in the Unicode Bidirectional Algorithm // ( http://www.unicode.org/reports/tr9/ ). // B represents the category Block Separator. // L represents the category Left to Right character. // R represents the category Right to Left character. // AL represents the category Arabic Letter. // AN represents the category Arabic Number. // EN represents the category European Number. static final byte B = Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR; static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT; static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT; static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER; static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER; static final char LRM = 0x200E; static final char RLM = 0x200F; static final char LRE = 0x202A; static final char RLE = 0x202B; static final char PDF = 0x202C; static final char[] MARKS = {LRM, RLM}; static final char[] EMBEDS = {LRE, RLE}; static final int PREFIX_LENGTH = 2; static final int SUFFIX_LENGTH = 2; static final int FIXES_LENGTH = PREFIX_LENGTH + SUFFIX_LENGTH; static final int[] EMPTY_INT_ARRAY = new int[0]; /** * The structured text handler utilized by this expert. */ protected final StructuredTextTypeHandler handler; /** * The environment associated with the expert. */ protected final StructuredTextEnvironment environment; /** * Flag which is true if the expert is stateful. */ protected final boolean sharedExpert; /** * Last state value set by {@link #setState} or {@link #clearState}. */ protected Object state; /** * Constructor used in {@link StructuredTextExpertFactory}. * * @param structuredTextHandler the structured text handler used by this expert. * @param environment the environment associated with this expert. * @param shared flag which is true if the expert is stateful. */ public StructuredTextImpl(StructuredTextTypeHandler structuredTextHandler, StructuredTextEnvironment environment, boolean shared) { this.handler = structuredTextHandler; this.environment = environment; sharedExpert = shared; } public StructuredTextTypeHandler getTypeHandler() { return handler; } public StructuredTextEnvironment getEnvironment() { return environment; } public int getTextDirection(String text) { return handler.getDirection(this, text); } public void clearState() { if (sharedExpert) state = null; } public void setState(Object newState) { if (sharedExpert) state = newState; } public Object getState() { return state; } long computeNextLocation(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int[] locations, int curPos) { String separators = handler.getSeparators(this); int separCount = separators.length(); int specialsCount = handler.getSpecialsCount(this); int len = text.length(); int nextLocation = len; int idxLocation = 0; // Start with special sequences to give them precedence over simple // separators. This may apply to cases like slash+asterisk versus slash. for (int i = 0; i < specialsCount; i++) { int location = locations[separCount + i]; if (location < curPos) { location = handler.indexOfSpecial(this, text, charTypes, offsets, i + 1, curPos); if (location < 0) location = len; locations[separCount + i] = location; } if (location < nextLocation) { nextLocation = location; idxLocation = separCount + i; } } for (int i = 0; i < separCount; i++) { int location = locations[i]; if (location < curPos) { location = text.indexOf(separators.charAt(i), curPos); if (location < 0) location = len; locations[i] = location; } if (location < nextLocation) { nextLocation = location; idxLocation = i; } } return nextLocation + (((long) idxLocation) << 32); } /** * @see StructuredTextTypeHandler#processSeparator StructuredTextTypeHandler.processSeparator */ static public void processSeparator(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int separLocation) { int len = text.length(); int direction = charTypes.getDirection(); if (direction == DIR_RTL) { // the structured text base direction is RTL for (int i = separLocation - 1; i >= 0; i--) { byte charType = charTypes.getBidiTypeAt(i); if (charType == R || charType == AL) return; if (charType == L) { for (int j = separLocation; j < len; j++) { charType = charTypes.getBidiTypeAt(j); if (charType == R || charType == AL) return; if (charType == L || charType == EN) { offsets.insertOffset(charTypes, separLocation); return; } } return; } } return; } // the structured text base direction is LTR boolean doneAN = false; for (int i = separLocation - 1; i >= 0; i--) { byte charType = charTypes.getBidiTypeAt(i); if (charType == L) return; if (charType == R || charType == AL) { for (int j = separLocation; j < len; j++) { charType = charTypes.getBidiTypeAt(j); if (charType == L) return; if (charType == R || charType == EN || charType == AL || charType == AN) { offsets.insertOffset(charTypes, separLocation); return; } } return; } if (charType == AN && !doneAN) { for (int j = separLocation; j < len; j++) { charType = charTypes.getBidiTypeAt(j); if (charType == L) return; if (charType == AL || charType == AN || charType == R) { offsets.insertOffset(charTypes, separLocation); return; } } doneAN = true; } } } /** * When the orientation is <code>ORIENT_LTR</code> and the * structured text has a RTL base direction, * {@link IStructuredTextExpert#leanToFullText leanToFullText} * adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its * end. * <p> * When the orientation is <code>ORIENT_RTL</code> and the * structured text has a LTR base direction, * {@link IStructuredTextExpert#leanToFullText leanToFullText} * adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its * end. * <p> * When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or * <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve * to a RTL orientation while the structured text has a LTR base * direction, {@link IStructuredTextExpert#leanToFullText leanToFullText} * adds LRM at the head of the <i>full</i> text. * <p> * When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or * <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve * to a LTR orientation while the structured text has a RTL base * direction, {@link IStructuredTextExpert#leanToFullText leanToFullText} * adds RLM at the head of the <i>full</i> text. * <p> * When the orientation is <code>ORIENT_UNKNOWN</code> and the * structured text has a LTR base direction, * {@link IStructuredTextExpert#leanToFullText leanToFullText} * adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its * end. * <p> * When the orientation is <code>ORIENT_UNKNOWN</code> and the * structured text has a RTL base direction, * {@link IStructuredTextExpert#leanToFullText leanToFullText} * adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its * end. * <p> * When the orientation is <code>ORIENT_IGNORE</code>, * {@link IStructuredTextExpert#leanToFullText leanToFullText} does not add any directional * formatting characters as either prefix or suffix of the <i>full</i> text. * <p> */ public String leanToFullText(String text) { int len = text.length(); if (len == 0) return text; StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); int prefixLength = offsets.getPrefixLength(); int direction = charTypes.getDirection(); return insertMarks(text, offsets.getOffsets(), direction, prefixLength); } public int[] leanToFullMap(String text) { int len = text.length(); if (len == 0) return EMPTY_INT_ARRAY; StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); int prefixLength = offsets.getPrefixLength(); int[] map = new int[len]; int count = offsets.getCount(); // number of used entries int added = prefixLength; for (int pos = 0, i = 0; pos < len; pos++) { if (i < count && pos == offsets.getOffset(i)) { added++; i++; } map[pos] = pos + added; } return map; } public int[] leanBidiCharOffsets(String text) { int len = text.length(); if (len == 0) return EMPTY_INT_ARRAY; StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); return offsets.getOffsets(); } private StructuredTextOffsets leanToFullCommon(String text, StructuredTextCharTypes charTypes) { int len = text.length(); int direction = handler.getDirection(this, text, charTypes); StructuredTextOffsets offsets = new StructuredTextOffsets(); if (!handler.skipProcessing(this, text, charTypes)) { // initialize locations int separCount = handler.getSeparators(this).length(); int[] locations = new int[separCount + handler.getSpecialsCount(this)]; for (int i = 0, k = locations.length; i < k; i++) { locations[i] = -1; } // current position int curPos = 0; if (state != null) { curPos = handler.processSpecial(this, text, charTypes, offsets, 0, -1); } while (true) { // location of next token to handle int nextLocation; // index of next token to handle (if < separCount, this is a separator; otherwise a special case int idxLocation; long res = computeNextLocation(text, charTypes, offsets, locations, curPos); nextLocation = (int) (res & 0x00000000FFFFFFFF); /* low word */ if (nextLocation >= len) break; idxLocation = (int) (res >> 32); /* high word */ if (idxLocation < separCount) { processSeparator(text, charTypes, offsets, nextLocation); curPos = nextLocation + 1; } else { idxLocation -= (separCount - 1); // because caseNumber starts from 1 curPos = handler.processSpecial(this, text, charTypes, offsets, idxLocation, nextLocation); } if (curPos >= len) break; } // end while } // end if (!handler.skipProcessing()) int prefixLength; int orientation = environment.getOrientation(); if (orientation == StructuredTextEnvironment.ORIENT_IGNORE) prefixLength = 0; else { int resolvedOrientation = charTypes.resolveOrientation(); if (orientation != StructuredTextEnvironment.ORIENT_UNKNOWN && resolvedOrientation == direction) prefixLength = 0; else if ((orientation & StructuredTextEnvironment.ORIENT_CONTEXTUAL) != 0) prefixLength = 1; else prefixLength = 2; } offsets.setPrefixLength(prefixLength); return offsets; } public String fullToLeanText(String full) { if (full.length() == 0) return full; int dir = handler.getDirection(this, full); char curMark = MARKS[dir]; char curEmbed = EMBEDS[dir]; int i; // used as loop index // remove any prefix and leading mark int lenFull = full.length(); for (i = 0; i < lenFull; i++) { char c = full.charAt(i); if (c != curEmbed && c != curMark) break; } if (i > 0) { // found at least one prefix or leading mark full = full.substring(i); lenFull = full.length(); } // remove any suffix and trailing mark for (i = lenFull - 1; i >= 0; i--) { char c = full.charAt(i); if (c != PDF && c != curMark) break; } if (i < 0) // only suffix and trailing marks, no real data return EMPTY_STRING; if (i < (lenFull - 1)) { // found at least one suffix or trailing mark full = full.substring(0, i + 1); lenFull = full.length(); } char[] chars = full.toCharArray(); // remove marks from chars int cnt = 0; for (i = 0; i < lenFull; i++) { char c = chars[i]; if (c == curMark) cnt++; else if (cnt > 0) chars[i - cnt] = c; } String lean = new String(chars, 0, lenFull - cnt); String full2 = leanToFullText(lean); // strip prefix and suffix int beginIndex = 0, endIndex = full2.length(); if (full2.charAt(0) == curMark) beginIndex = 1; else { if (full2.charAt(0) == curEmbed) { beginIndex = 1; if (full2.charAt(0) == curMark) beginIndex = 2; } if (full2.charAt(endIndex - 1) == PDF) { endIndex--; if (full2.charAt(endIndex - 1) == curMark) endIndex--; } } if (beginIndex > 0 || endIndex < full2.length()) full2 = full2.substring(beginIndex, endIndex); if (full2.equals(full)) return lean; // There are some marks in full which are not in full2 and/or vice versa. // We need to add to lean any mark appearing in full and not in full2. // The completed lean can never be longer than full itself. char[] newChars = new char[lenFull]; char cFull, cFull2; int idxFull, idxFull2, idxLean, newCharsPos; int lenFull2 = full2.length(); idxFull = idxFull2 = idxLean = newCharsPos = 0; while (idxFull < lenFull && idxFull2 < lenFull2) { cFull2 = full2.charAt(idxFull2); cFull = full.charAt(idxFull); if (cFull2 == cFull) { /* chars are equal, proceed */ if (cFull2 != curMark) newChars[newCharsPos++] = chars[idxLean++]; idxFull++; idxFull2++; continue; } if (cFull2 == curMark) { /* extra Mark in full2 text */ idxFull2++; continue; } if (cFull == curMark) { /* extra Mark in source full text */ idxFull++; // idxFull-2 always >= 0 since leading Marks were removed from full if (full.charAt(idxFull - 2) == curMark) continue; // ignore successive Marks in full after the first one newChars[newCharsPos++] = curMark; continue; } // we should never get here (extra char which is not a Mark) throw new IllegalStateException("Internal error: extra character not a Mark."); //$NON-NLS-1$ } if (idxFull < lenFull) /* full2 ended before full - this should never happen since we removed all marks and PDFs at the end of full */ throw new IllegalStateException("Internal error: unexpected EOL."); //$NON-NLS-1$ lean = new String(newChars, 0, newCharsPos); return lean; } public int[] fullToLeanMap(String full) { int lenFull = full.length(); if (lenFull == 0) return EMPTY_INT_ARRAY; String lean = fullToLeanText(full); int lenLean = lean.length(); int dir = handler.getDirection(this, lean); char curMark = MARKS[dir]; char curEmbed = EMBEDS[dir]; int[] map = new int[lenFull]; int idxFull, idxLean; // skip any prefix and leading mark for (idxFull = 0; idxFull < lenFull; idxFull++) { char c = full.charAt(idxFull); if (c != curEmbed && c != curMark) break; map[idxFull] = -1; } // lean must be a subset of Full, so we only check on iLean < leanLen for (idxLean = 0; idxLean < lenLean; idxFull++) { if (full.charAt(idxFull) == lean.charAt(idxLean)) { map[idxFull] = idxLean; idxLean++; } else map[idxFull] = -1; } for (; idxFull < lenFull; idxFull++) map[idxFull] = -1; return map; } public int[] fullBidiCharOffsets(String full) { int lenFull = full.length(); if (lenFull == 0) return EMPTY_INT_ARRAY; String lean = fullToLeanText(full); StructuredTextOffsets offsets = new StructuredTextOffsets(); int lenLean = lean.length(); int idxLean, idxFull; // lean must be a subset of Full, so we only check on iLean < leanLen for (idxLean = idxFull = 0; idxLean < lenLean; idxFull++) { if (full.charAt(idxFull) == lean.charAt(idxLean)) idxLean++; else offsets.insertOffset(null, idxFull); } for (; idxFull < lenFull; idxFull++) offsets.insertOffset(null, idxFull); return offsets.getOffsets(); } public String insertMarks(String text, int[] offsets, int direction, int affixLength) { if (direction != DIR_LTR && direction != DIR_RTL) throw new IllegalArgumentException("Invalid direction"); //$NON-NLS-1$ if (affixLength < 0 || affixLength > 2) throw new IllegalArgumentException("Invalid affix length"); //$NON-NLS-1$ int count = offsets == null ? 0 : offsets.length; if (count == 0 && affixLength == 0) return text; int textLength = text.length(); if (textLength == 0) return text; int newLen = textLength + count; if (affixLength == 1) newLen++; /* +1 for a mark char */ else if (affixLength == 2) newLen += FIXES_LENGTH; char[] fullChars = new char[newLen]; int added = affixLength; // add marks at offsets char curMark = MARKS[direction]; for (int i = 0, j = 0; i < textLength; i++) { char c = text.charAt(i); if (j < count && i == offsets[j]) { fullChars[i + added] = curMark; added++; j++; } fullChars[i + added] = c; } if (affixLength > 0) { /* add prefix/suffix ? */ if (affixLength == 1) { /* contextual orientation */ fullChars[0] = curMark; } else { // When the orientation is RTL, we need to add EMBED at the // start of the text and PDF at its end. // However, because of a bug in Windows' handling of LRE/RLE/PDF, // we add LRM or RLM (according to the direction) after the // LRE/RLE and again before the PDF. char curEmbed = EMBEDS[direction]; fullChars[0] = curEmbed; fullChars[1] = curMark; fullChars[newLen - 1] = PDF; fullChars[newLen - 2] = curMark; } } return new String(fullChars); } public String toString() { return super.toString() + " [handler=" + handler.toString() + "]"; //$NON-NLS-1$ //$NON-NLS-2$ } }