/* */ package com.ibm.icu.text;
/* */
/* */ import com.ibm.icu.impl.Assert;
/* */ import java.io.IOException;
/* */ import java.io.InputStream;
/* */ import java.text.CharacterIterator;
/* */ import java.util.Stack;
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */ class ThaiBreakIterator
/* */ extends DictionaryBasedBreakIterator
/* */ {
/* */ static class PossibleWord
/* */ {
/* 24 */ private final int POSSIBLE_WORD_LIST_MAX = 20;
/* */
/* */ private int[] lengths;
/* */ private int[] count;
/* */ private int prefix;
/* */ private int offset;
/* */ private int mark;
/* */ private int current;
/* */
/* */ public PossibleWord()
/* */ {
/* 35 */ this.lengths = new int[20];
/* 36 */ this.count = new int[1];
/* 37 */ this.offset = -1;
/* */ }
/* */
/* */ public int candidates(CharacterIterator fIter, BreakCTDictionary dict, int rangeEnd)
/* */ {
/* 42 */ int start = fIter.getIndex();
/* 43 */ if (start != this.offset) {
/* 44 */ this.offset = start;
/* 45 */ this.prefix = dict.matches(fIter, rangeEnd - start, this.lengths, this.count, this.lengths.length);
/* */
/* 47 */ if (this.count[0] <= 0) {
/* 48 */ fIter.setIndex(start);
/* */ }
/* */ }
/* 51 */ if (this.count[0] > 0) {
/* 52 */ fIter.setIndex(start + this.lengths[(this.count[0] - 1)]);
/* */ }
/* 54 */ this.current = (this.count[0] - 1);
/* 55 */ this.mark = this.current;
/* 56 */ return this.count[0];
/* */ }
/* */
/* */ public int acceptMarked(CharacterIterator fIter)
/* */ {
/* 61 */ fIter.setIndex(this.offset + this.lengths[this.mark]);
/* 62 */ return this.lengths[this.mark];
/* */ }
/* */
/* */
/* */ public boolean backUp(CharacterIterator fIter)
/* */ {
/* 68 */ if (this.current > 0) {
/* 69 */ fIter.setIndex(this.offset + this.lengths[(--this.current)]);
/* 70 */ return true;
/* */ }
/* 72 */ return false;
/* */ }
/* */
/* */ public int longestPrefix()
/* */ {
/* 77 */ return this.prefix;
/* */ }
/* */
/* */ public void markCurrent()
/* */ {
/* 82 */ this.mark = this.current;
/* */ }
/* */ }
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* */
/* 112 */ private static UnicodeSet fThaiWordSet = new UnicodeSet();
/* 113 */ private static UnicodeSet fEndWordSet; private static UnicodeSet fBeginWordSet; private static UnicodeSet fSuffixSet; private static UnicodeSet fMarkSet = new UnicodeSet();
/* 114 */ static { fEndWordSet = new UnicodeSet();
/* 115 */ fBeginWordSet = new UnicodeSet();
/* 116 */ fSuffixSet = new UnicodeSet();
/* */
/* 118 */ fThaiWordSet.applyPattern("[[:Thai:]&[:LineBreak=SA:]]");
/* 119 */ fThaiWordSet.compact();
/* */
/* 121 */ fMarkSet.applyPattern("[[:Thai:]&[:LineBreak=SA:]&[:M:]]");
/* 122 */ fMarkSet.add(32);
/* 123 */ fEndWordSet = fThaiWordSet;
/* 124 */ fEndWordSet.remove(3633);
/* 125 */ fEndWordSet.remove(3648, 3652);
/* 126 */ fBeginWordSet.add(3585, 3630);
/* 127 */ fBeginWordSet.add(3648, 3652);
/* 128 */ fSuffixSet.add(3631);
/* 129 */ fSuffixSet.add(3654);
/* */
/* */
/* 132 */ fMarkSet.compact();
/* 133 */ fEndWordSet.compact();
/* 134 */ fBeginWordSet.compact();
/* 135 */ fSuffixSet.compact();
/* */
/* */
/* 138 */ fThaiWordSet.freeze();
/* 139 */ fMarkSet.freeze();
/* 140 */ fEndWordSet.freeze();
/* 141 */ fBeginWordSet.freeze();
/* 142 */ fSuffixSet.freeze();
/* */ }
/* */
/* */ private BreakCTDictionary fDictionary;
/* 146 */ public ThaiBreakIterator(InputStream ruleStream, InputStream dictionaryStream) throws IOException { super(ruleStream);
/* */
/* 148 */ this.fDictionary = new BreakCTDictionary(dictionaryStream);
/* */ }
/* */
/* */ private static final byte THAI_LOOKAHEAD = 3;
/* */ private static final byte THAI_ROOT_COMBINE_THRESHOLD = 3;
/* */ protected int handleNext()
/* */ {
/* 155 */ CharacterIterator text = getText();
/* */
/* */
/* */
/* */
/* 160 */ if ((this.cachedBreakPositions == null) || (this.positionInCache == this.cachedBreakPositions.length - 1))
/* */ {
/* */
/* */
/* */
/* 165 */ int startPos = text.getIndex();
/* 166 */ this.fDictionaryCharCount = 0;
/* 167 */ int result = super.handleNext();
/* */
/* */
/* */
/* */
/* 172 */ if ((this.fDictionaryCharCount > 1) && (result - startPos > 1)) {
/* 173 */ divideUpDictionaryRange(startPos, result);
/* */
/* */ }
/* */ else
/* */ {
/* */
/* 179 */ this.cachedBreakPositions = null;
/* 180 */ return result;
/* */ }
/* */ }
/* */
/* */
/* */
/* 186 */ if (this.cachedBreakPositions != null) {
/* 187 */ this.positionInCache += 1;
/* 188 */ text.setIndex(this.cachedBreakPositions[this.positionInCache]);
/* 189 */ return this.cachedBreakPositions[this.positionInCache];
/* */ }
/* 191 */ Assert.assrt(false);
/* 192 */ return 55537;
/* */ }
/* */
/* */
/* */ private static final byte THAI_PREFIX_COMBINE_THRESHOLD = 3;
/* */
/* */ private static final char THAI_PAIYANNOI = 'ฯ';
/* */ private static final char THAI_MAIYAMOK = 'ๆ';
/* */ private static final byte THAI_MIN_WORD = 2;
/* */ private int divideUpDictionaryRange(int rangeStart, int rangeEnd)
/* */ {
/* 203 */ if (rangeEnd - rangeStart < 2) {
/* 204 */ return 0;
/* */ }
/* 206 */ CharacterIterator fIter = getText();
/* 207 */ int wordsFound = 0;
/* */
/* */
/* 210 */ Stack<Integer> foundBreaks = new Stack();
/* 211 */ PossibleWord[] words = new PossibleWord[3];
/* 212 */ for (int i = 0; i < 3; i++) {
/* 213 */ words[i] = new PossibleWord();
/* */ }
/* */
/* */
/* 217 */ fIter.setIndex(rangeStart);
/* */ int current;
/* 219 */ while ((current = fIter.getIndex()) < rangeEnd) {
/* 220 */ int wordLength = 0;
/* */
/* */
/* 223 */ int candidates = words[(wordsFound % 3)].candidates(fIter, this.fDictionary, rangeEnd);
/* */
/* */
/* 226 */ if (candidates == 1) {
/* 227 */ wordLength = words[(wordsFound % 3)].acceptMarked(fIter);
/* 228 */ wordsFound++;
/* */
/* */
/* */ }
/* 232 */ else if (candidates > 1) {
/* 233 */ boolean foundBest = false;
/* */
/* 235 */ if (fIter.getIndex() < rangeEnd) {
/* */ do {
/* 237 */ int wordsMatched = 1;
/* 238 */ if (words[((wordsFound + 1) % 3)].candidates(fIter, this.fDictionary, rangeEnd) > 0) {
/* 239 */ if (wordsMatched < 2)
/* */ {
/* 241 */ words[(wordsFound % 3)].markCurrent();
/* 242 */ wordsMatched = 2;
/* */ }
/* */
/* */
/* 246 */ if (fIter.getIndex() >= rangeEnd) {
/* */ break;
/* */ }
/* */
/* */
/* */ do
/* */ {
/* 253 */ if (words[((wordsFound + 2) % 3)].candidates(fIter, this.fDictionary, rangeEnd) > 0) {
/* 254 */ words[(wordsFound % 3)].markCurrent();
/* 255 */ foundBest = true;
/* 256 */ break;
/* */ }
/* 258 */ } while (words[((wordsFound + 1) % 3)].backUp(fIter));
/* */ }
/* 260 */ } while ((words[(wordsFound % 3)].backUp(fIter)) && (!foundBest));
/* */ }
/* 262 */ wordLength = words[(wordsFound % 3)].acceptMarked(fIter);
/* 263 */ wordsFound++;
/* */ }
/* */
/* */
/* */
/* */
/* */
/* 270 */ if ((fIter.getIndex() < rangeEnd) && (wordLength < 3))
/* */ {
/* */
/* */
/* 274 */ if ((words[(wordsFound % 3)].candidates(fIter, this.fDictionary, rangeEnd) <= 0) && ((wordLength == 0) || (words[(wordsFound % 3)].longestPrefix() < 3)))
/* */ {
/* */
/* */
/* 278 */ int remaining = rangeEnd - (current + wordLength);
/* 279 */ int pc = fIter.current();
/* 280 */ int chars = 0;
/* */ for (;;) {
/* 282 */ fIter.next();
/* 283 */ int uc = fIter.current();
/* 284 */ chars++;
/* 285 */ remaining--; if (remaining <= 0) {
/* */ break;
/* */ }
/* 288 */ if ((fEndWordSet.contains(pc)) && (fBeginWordSet.contains(uc)))
/* */ {
/* */
/* */
/* */
/* */
/* 294 */ int candidate = words[((wordsFound + 1) % 3)].candidates(fIter, this.fDictionary, rangeEnd);
/* 295 */ fIter.setIndex(current + wordLength + chars);
/* 296 */ if (candidate > 0) {
/* */ break;
/* */ }
/* */ }
/* 300 */ pc = uc;
/* */ }
/* */
/* */
/* 304 */ if (wordLength <= 0) {
/* 305 */ wordsFound++;
/* */ }
/* */
/* */
/* 309 */ wordLength += chars;
/* */ }
/* */ else {
/* 312 */ fIter.setIndex(current + wordLength);
/* */ }
/* */ }
/* */
/* */ int currPos;
/* */
/* 318 */ while (((currPos = fIter.getIndex()) < rangeEnd) && (fMarkSet.contains(fIter.current()))) {
/* 319 */ fIter.next();
/* 320 */ wordLength += fIter.getIndex() - currPos;
/* */ }
/* */
/* */
/* */
/* */
/* */
/* 327 */ if ((fIter.getIndex() < rangeEnd) && (wordLength > 0)) { int uc;
/* 328 */ if ((words[(wordsFound % 3)].candidates(fIter, this.fDictionary, rangeEnd) <= 0) && (fSuffixSet.contains(uc = fIter.current())))
/* */ {
/* 330 */ if (uc == 3631) {
/* 331 */ if (!fSuffixSet.contains(fIter.previous()))
/* */ {
/* 333 */ fIter.next();
/* 334 */ fIter.next();
/* 335 */ wordLength++;
/* 336 */ uc = fIter.current();
/* */ }
/* */ else {
/* 339 */ fIter.next();
/* */ }
/* */ }
/* 342 */ if (uc == 3654) {
/* 343 */ if (fIter.previous() != 'ๆ')
/* */ {
/* 345 */ fIter.next();
/* 346 */ fIter.next();
/* 347 */ wordLength++;
/* */ }
/* */ else {
/* 350 */ fIter.next();
/* */ }
/* */ }
/* */ } else {
/* 354 */ fIter.setIndex(current + wordLength);
/* */ }
/* */ }
/* */
/* */
/* 359 */ if (wordLength > 0) {
/* 360 */ foundBreaks.push(Integer.valueOf(current + wordLength));
/* */ }
/* */ }
/* */
/* */
/* 365 */ if (((Integer)foundBreaks.peek()).intValue() >= rangeEnd) {
/* 366 */ foundBreaks.pop();
/* 367 */ wordsFound--;
/* */ }
/* */
/* */
/* 371 */ this.cachedBreakPositions = new int[foundBreaks.size() + 2];
/* 372 */ this.cachedBreakPositions[0] = rangeStart;
/* */
/* 374 */ for (int i = 0; i < foundBreaks.size(); i++) {
/* 375 */ this.cachedBreakPositions[(i + 1)] = ((Integer)foundBreaks.elementAt(i)).intValue();
/* */ }
/* 377 */ this.cachedBreakPositions[(i + 1)] = rangeEnd;
/* 378 */ this.positionInCache = 0;
/* */
/* 380 */ return wordsFound;
/* */ }
/* */ }
/* Location: C:\Users\Ethan\Desktop\FontZip\FontTool\sfnttool.jar!\com\ibm\icu\text\ThaiBreakIterator.class
* Java compiler version: 5 (49.0)
* JD-Core Version: 0.7.1
*/