/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.complexscripts.scripts; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fop.complexscripts.util.CharAssociation; import org.apache.fop.complexscripts.util.GlyphSequence; // CSOFF: LineLengthCheck /** * <p>The <code>GujaratiScriptProcessor</code> class implements a script processor for * performing glyph substitution and positioning operations on content associated with the Gujarati script.</p> * * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p> */ public class GujaratiScriptProcessor extends IndicScriptProcessor { /** logging instance */ private static final Log log = LogFactory.getLog(GujaratiScriptProcessor.class); GujaratiScriptProcessor(String script) { super(script); } @Override protected Class<? extends GujaratiSyllabizer> getSyllabizerClass() { return GujaratiSyllabizer.class; } @Override // find rightmost pre-base matra protected int findPreBaseMatra(GlyphSequence gs) { int ng = gs.getGlyphCount(); int lk = -1; for (int i = ng; i > 0; i--) { int k = i - 1; if (containsPreBaseMatra(gs, k)) { lk = k; break; } } return lk; } @Override // find leftmost pre-base matra target, starting from source protected int findPreBaseMatraTarget(GlyphSequence gs, int source) { int ng = gs.getGlyphCount(); int lk = -1; for (int i = (source < ng) ? source : ng; i > 0; i--) { int k = i - 1; if (containsConsonant(gs, k)) { if (containsHalfConsonant(gs, k)) { lk = k; } else if (lk == -1) { lk = k; } else { break; } } } return lk; } private static boolean containsPreBaseMatra(GlyphSequence gs, int k) { CharAssociation a = gs.getAssociation(k); int[] ca = gs.getCharacterArray(false); for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { if (isPreM(ca [ i ])) { return true; } } return false; } private static boolean containsConsonant(GlyphSequence gs, int k) { CharAssociation a = gs.getAssociation(k); int[] ca = gs.getCharacterArray(false); for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { if (isC(ca [ i ])) { return true; } } return false; } private static boolean containsHalfConsonant(GlyphSequence gs, int k) { Boolean half = (Boolean) gs.getAssociation(k) .getPredication("half"); return (half != null) ? half : false; } @Override protected int findReph(GlyphSequence gs) { int ng = gs.getGlyphCount(); int li = -1; for (int i = 0; i < ng; i++) { if (containsReph(gs, i)) { li = i; break; } } return li; } @Override protected int findRephTarget(GlyphSequence gs, int source) { int ng = gs.getGlyphCount(); int c1 = -1; int c2 = -1; // first candidate target is after first non-half consonant for (int i = 0; i < ng; i++) { if ((i != source) && containsConsonant(gs, i)) { if (!containsHalfConsonant(gs, i)) { c1 = i + 1; break; } } } // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark for (int i = (c1 >= 0) ? c1 : 0; i < ng; i++) { if (containsMatra(gs, i) && !containsPreBaseMatra(gs, i)) { c2 = i + 1; } else if (containsOtherMark(gs, i)) { c2 = i; break; } } if (c2 >= 0) { return c2; } else if (c1 >= 0) { return c1; } else { return source; } } private static boolean containsReph(GlyphSequence gs, int k) { Boolean rphf = (Boolean) gs.getAssociation(k) .getPredication("rphf"); return (rphf != null) ? rphf : false; } private static boolean containsMatra(GlyphSequence gs, int k) { CharAssociation a = gs.getAssociation(k); int[] ca = gs.getCharacterArray(false); for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { if (isM(ca [ i ])) { return true; } } return false; } private static boolean containsOtherMark(GlyphSequence gs, int k) { CharAssociation a = gs.getAssociation(k); int[] ca = gs.getCharacterArray(false); for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { switch (typeOf(ca [ i ])) { case C_T: // tone (e.g., udatta, anudatta) case C_A: // accent (e.g., acute, grave) case C_O: // other (e.g., candrabindu, anusvara, visarga, etc) return true; default: break; } } return false; } private static class GujaratiSyllabizer extends DefaultSyllabizer { GujaratiSyllabizer(String script, String language) { super(script, language); } @Override // | C ... protected int findStartOfSyllable(int[] ca, int s, int e) { if ((s < 0) || (s >= e)) { return -1; } else { while (s < e) { int c = ca [ s ]; if (isC(c)) { break; } else { s++; } } return s; } } @Override // D* L? | ... protected int findEndOfSyllable(int[] ca, int s, int e) { if ((s < 0) || (s >= e)) { return -1; } else { int nd = 0; int nl = 0; int i; // consume dead consonants while ((i = isDeadConsonant(ca, s, e)) > s) { s = i; nd++; } // consume zero or one live consonant if ((i = isLiveConsonant(ca, s, e)) > s) { s = i; nl++; } return ((nd > 0) || (nl > 0)) ? s : -1; } } // D := ( C N? H )? private int isDeadConsonant(int[] ca, int s, int e) { if (s < 0) { return -1; } else { int c; int i = 0; int nc = 0; int nh = 0; do { // C if ((s + i) < e) { c = ca [ s + i ]; if (isC(c)) { i++; nc++; } else { break; } } // N? if ((s + i) < e) { c = ca [ s + 1 ]; if (isN(c)) { i++; } } // H if ((s + i) < e) { c = ca [ s + i ]; if (isH(c)) { i++; nh++; } else { break; } } } while (false); return (nc > 0) && (nh > 0) ? s + i : -1; } } // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK ) private int isLiveConsonant(int[] ca, int s, int e) { if (s < 0) { return -1; } else { int c; int i = 0; int nc = 0; int nv = 0; int nx = 0; do { // C if ((s + i) < e) { c = ca [ s + i ]; if (isC(c)) { i++; nc++; } else if (isV(c)) { i++; nv++; } else { break; } } // N? if ((s + i) < e) { c = ca [ s + i ]; if (isN(c)) { i++; } } // X* while ((s + i) < e) { c = ca [ s + i ]; if (isX(c)) { i++; nx++; } else { break; } } } while (false); // if no X but has H, then ignore C|I if (nx == 0) { if ((s + i) < e) { c = ca [ s + i ]; if (isH(c)) { if (nc > 0) { nc--; } else if (nv > 0) { nv--; } } } } return ((nc > 0) || (nv > 0)) ? s + i : -1; } } } // gujarati character types static final short C_U = 0; // unassigned static final short C_C = 1; // consonant static final short C_V = 2; // vowel static final short C_M = 3; // vowel sign (matra) static final short C_S = 4; // symbol or sign static final short C_T = 5; // tone mark static final short C_A = 6; // accent mark static final short C_P = 7; // punctuation static final short C_D = 8; // digit static final short C_H = 9; // halant (virama) static final short C_O = 10; // other signs static final short C_N = 0x0100; // nukta(ized) static final short C_R = 0x0200; // reph(ized) static final short C_PRE = 0x0400; // pre-base static final short C_M_TYPE = 0x00FF; // type mask static final short C_M_FLAGS = 0x7F00; // flag mask // gujarati block range static final int CCA_START = 0x0A80; // first code point mapped by cca static final int CCA_END = 0x0B00; // last code point + 1 mapped by cca // gujarati character type lookups static final short[] CCA = { C_U, // 0x0A80 // UNASSIGNED C_O, // 0x0A81 // CANDRABINDU C_O, // 0x0A82 // ANUSVARA C_O, // 0x0A83 // VISARGA C_U, // 0x0A84 // UNASSIGNED C_V, // 0x0A85 // A C_V, // 0x0A86 // AA C_V, // 0x0A87 // I C_V, // 0x0A88 // II C_V, // 0x0A89 // U C_V, // 0x0A8A // UU C_V, // 0x0A8B // VOCALIC R C_V, // 0x0A8C // VOCALIC L C_V, // 0x0A8D // CANDRA E C_U, // 0x0A8E // UNASSIGNED C_V, // 0x0A8F // E C_V, // 0x0A90 // AI C_V, // 0x0A91 // CANDRA O C_U, // 0x0A92 // UNASSIGNED C_V, // 0x0A93 // O C_V, // 0x0A94 // AU C_C, // 0x0A95 // KA C_C, // 0x0A96 // KHA C_C, // 0x0A97 // GA C_C, // 0x0A98 // GHA C_C, // 0x0A99 // NGA C_C, // 0x0A9A // CA C_C, // 0x0A9B // CHA C_C, // 0x0A9C // JA C_C, // 0x0A9D // JHA C_C, // 0x0A9E // NYA C_C, // 0x0A9F // TTA C_C, // 0x0AA0 // TTHA C_C, // 0x0AA1 // DDA C_C, // 0x0AA2 // DDHA C_C, // 0x0AA3 // NNA C_C, // 0x0AA4 // TA C_C, // 0x0AA5 // THA C_C, // 0x0AA6 // DA C_C, // 0x0AA7 // DHA C_C, // 0x0AA8 // NA C_U, // 0x0AA9 // UNASSIGNED C_C, // 0x0AAA // PA C_C, // 0x0AAB // PHA C_C, // 0x0AAC // BA C_C, // 0x0AAD // BHA C_C, // 0x0AAE // MA C_C, // 0x0AAF // YA C_C | C_R, // 0x0AB0 // RA C_U, // 0x0AB1 // UNASSIGNED C_C, // 0x0AB2 // LA C_C, // 0x0AB3 // LLA C_U, // 0x0AB4 // UNASSIGNED C_C, // 0x0AB5 // VA C_C, // 0x0AB6 // SHA C_C, // 0x0AB7 // SSA C_C, // 0x0AB8 // SA C_C, // 0x0AB9 // HA C_U, // 0x0ABA // UNASSIGNED C_U, // 0x0ABB // UNASSIGNED C_N, // 0x0ABC // NUKTA C_S, // 0x0ABD // AVAGRAHA C_M, // 0x0ABE // AA C_M | C_PRE, // 0x0ABF // I C_M, // 0x0AC0 // II C_M, // 0x0AC1 // U C_M, // 0x0AC2 // UU C_M, // 0x0AC3 // VOCALIC R C_M, // 0x0AC4 // VOCALIC RR C_M, // 0x0AC5 // CANDRA E C_U, // 0x0AC6 // UNASSIGNED C_M, // 0x0AC7 // E C_M, // 0x0AC8 // AI C_M, // 0x0AC9 // CANDRA O C_U, // 0x0ACA // UNASSIGNED C_M, // 0x0ACB // O C_M, // 0x0ACC // AU C_H, // 0x0ACD // VIRAMA (HALANT) C_U, // 0x0ACE // UNASSIGNED C_U, // 0x0ACF // UNASSIGNED C_S, // 0x0AD0 // OM C_U, // 0x0AD1 // UNASSIGNED C_U, // 0x0AD2 // UNASSIGNED C_U, // 0x0AD3 // UNASSIGNED C_U, // 0x0AD4 // UNASSIGNED C_U, // 0x0AD5 // UNASSIGNED C_U, // 0x0AD6 // UNASSIGNED C_U, // 0x0AD7 // UNASSIGNED C_U, // 0x0AD8 // UNASSIGNED C_U, // 0x0AD9 // UNASSIGNED C_U, // 0x0ADA // UNASSIGNED C_U, // 0x0ADB // UNASSIGNED C_U, // 0x0ADC // UNASSIGNED C_U, // 0x0ADD // UNASSIGNED C_U, // 0x0ADE // UNASSIGNED C_U, // 0x0ADF // UNASSIGNED C_V, // 0x0AE0 // VOCALIC RR C_V, // 0x0AE1 // VOCALIC LL C_M, // 0x0AE2 // VOCALIC L C_M, // 0x0AE3 // VOCALIC LL C_U, // 0x0AE4 // UNASSIGNED C_U, // 0x0AE5 // UNASSIGNED C_D, // 0x0AE6 // ZERO C_D, // 0x0AE7 // ONE C_D, // 0x0AE8 // TWO C_D, // 0x0AE9 // THREE C_D, // 0x0AEA // FOUR C_D, // 0x0AEB // FIVE C_D, // 0x0AEC // SIX C_D, // 0x0AED // SEVEN C_D, // 0x0AEE // EIGHT C_D, // 0x0AEF // NINE C_U, // 0x0AF0 // UNASSIGNED C_S, // 0x0AF1 // RUPEE SIGN C_U, // 0x0AF2 // UNASSIGNED C_U, // 0x0AF3 // UNASSIGNED C_U, // 0x0AF4 // UNASSIGNED C_U, // 0x0AF5 // UNASSIGNED C_U, // 0x0AF6 // UNASSIGNED C_U, // 0x0AF7 // UNASSIGNED C_U, // 0x0AF8 // UNASSIGNED C_U, // 0x0AF9 // UNASSIGNED C_U, // 0x0AFA // UNASSIGNED C_U, // 0x0AFB // UNASSIGNED C_U, // 0x0AFC // UNASSIGNED C_U, // 0x0AFD // UNASSIGNED C_U, // 0x0AFE // UNASSIGNED C_U // 0x0AFF // UNASSIGNED }; static int typeOf(int c) { if ((c >= CCA_START) && (c < CCA_END)) { return CCA [ c - CCA_START ] & C_M_TYPE; } else { return C_U; } } static boolean isType(int c, int t) { return typeOf(c) == t; } static boolean hasFlag(int c, int f) { if ((c >= CCA_START) && (c < CCA_END)) { return (CCA [ c - CCA_START ] & f) == f; } else { return false; } } static boolean isC(int c) { return isType(c, C_C); } static boolean isR(int c) { return isType(c, C_C) && hasR(c); } static boolean isV(int c) { return isType(c, C_V); } static boolean isN(int c) { return c == 0x0ABC; } static boolean isH(int c) { return c == 0x0ACD; } static boolean isM(int c) { return isType(c, C_M); } static boolean isPreM(int c) { return isType(c, C_M) && hasFlag(c, C_PRE); } static boolean isX(int c) { switch (typeOf(c)) { case C_M: // matra (combining vowel) case C_A: // accent mark case C_T: // tone mark case C_O: // other (modifying) mark return true; default: return false; } } static boolean hasR(int c) { return hasFlag(c, C_R); } static boolean hasN(int c) { return hasFlag(c, C_N); } }