ScriptSystem.java example

Explorer
QuaquaOld-master
- src
/*
 * @(#)ScriptSystem.java  
 *
 * Copyright (c) 2005-2010 Werner Randelshofer, Immensee, Switzerland.
 * All rights reserved.
 *
 * You may not use, copy or modify this file, except in compliance with the
 * license agreement you entered into with Werner Randelshofer.
 * For details see accompanying license terms.
 */

package ch.randelshofer.quaqua.util;

import java.awt.*;
/**
 * Constants for the script systems supported by Unicode.
 *
 * @author  Werner Randelshofer
 * @version $Id: ScriptSystem.java 363 2010-11-21 17:41:04Z wrandelshofer $
 */
public class ScriptSystem {
    /**
     * Script system specification.
     */
    private static class Spec {
        /**
         *
         * @param baseline java.awt.Font.ROMAN_BASELINE, CENTER_BASELINE or
         * HANGING_BASELINE.
         */
        public Spec(int system, int[] ranges, int measurementChar, int baseline) {
            this.system = system;
            this.ranges = ranges;
            this.measurementChar = (char) measurementChar;
            this.baseline = baseline;
        }
        int[] ranges;
        int system;
        char measurementChar;
        int baseline;
    }
    
    /**
     * Script systems. The ID of the script system is the lowest
     * Unicode character code of the script system.
     */
    public final static int
    // EUROPEAN ALPHABETS
    ARMENIAN = 0x530,
    COPTIC = 0x2c80,
    CYRILLIC = 0x400,
    GEORGIAN = 0x10a0,
    GREEK = 0x370,
    LATIN = 0x0,
    // AFRICAN SCRIPTS
    ETHIOPIC = 0x1200,
    // NKO = { 0X, 0X, 0X , Font.ROMAN_BASELINE), //
    TIFINAGH = 0x2d30,
    // MIDDLE EASTERN SCRIPTS
    ARABIC = 0x600,
    HEBREW = 0x590,
    SYRIAC = 0x700,
    THAANA = 0x780,
    // AMERICAN SCRIPTS
    CANADIAN_SYLLABICS = 0x1400,
    CHEROKEE = 0x13a0,
    DESERET = 0x10400,
    // OTHER SCRIPTS
    SHAVIAN = 0x10450,
    OSMANYA = 0x10480,
    // GLAGOLITIC =
    // INDIC SCRIPTS
    BENGALI = 0x980,
    DEVANAGARI = 0x900,
    GUJARATI = 0xa80,
    GURMUKHI = 0xa00,
    KANNADA = 0xc80,
    LIMBU = 0x1900,
    MALAYALAM = 0xd00,
    ORIYA = 0xb00,
    SINHALA = 0xd80,
    //SYLOTI_NAGRI =
    TAMIL = 0xb80,
    TELUGU = 0xc00,
    // PHILIPPINE SCRIPTS
    //BUHID =
    // HANUNOO =
    //TAGALOG =
    //TAGBANWA =
    // SOUTH EAST ASIAN SCRIPTS
    //BUGINESE =
    // BALINESE =
    KHMER = 0x1780,
    LAO = 0xe80,
    MYANMAR = 0x1000,
    //NEW_TAI_LUE =
    TAI_LE = 0x1950,
    THAI = 0xe00,
    // EAST ASIAN SCRIPTS
    HAN = 0x2e80,
    BOPOMOFO = 0x3100,
    HIRAGANA = 0x3040,
    KATAKANA = 0x30a0,
    HANGUL = 0x1100,
    YI = 0xa000,
    // CENTRAL ASIAN
    // KHAROSHTHI =
    MONGOLIAN = 0x1800,
    //PHAG_SPA =
    TIBETAN = 0xf00,
    // ANCIENT SCRIPTS
    //ANCIENT_GREEK =
    //CUNEIFORM =
    //OLD_PERSIAN =
    //UGARITIC =
    //LINEARB =
    //AEGEANNUMBERS =
    //COUNTING_ROD_NUMBERS =
    //CYPRIOT_SYLLABARY =
    //GOTHIC =
    //OLDITALIC =
    //OGHAM =
    RUNIC = 0x16a0;
    //PHOENICIAN = 0;
    
    /**
     * Lookup table for determining the script system of a
     * given character.
     *
     * The first item in the subarray is the id of the scripts system.
     * Each array consists of unicode code intervals (from, to) that
     * contains characters of the script system. The intervals start from
     * the first item until the second last item fo the subarray.
     * The last item of the subarray specifies the most useful character in
     * the script system to determine perceived ascent and perceived descent
     * of a font using this script system.
     */
    private final static Spec[] systems = {
        // European Alphabets
        new Spec(ARMENIAN, new int[] {ARMENIAN, 0x058f}, 0x552, Font.ROMAN_BASELINE), // ARMENIAN CAPITAL LETTER YIWN
        new Spec(COPTIC, new int[] {COPTIC, 0x2cff}, 0x2ca0, Font.ROMAN_BASELINE),  // COPTIC CAPITAL LETTER PI
        new Spec(CYRILLIC, new int[] {CYRILLIC, 0x52f}, 0x41d, Font.ROMAN_BASELINE),  // CYRILLIC CAPITAL LETTER EN
        new Spec(GEORGIAN, new int[] {GEORGIAN, 0x10ff}, 0x10a4 , Font.ROMAN_BASELINE), // GEORGIAN CAPITAL LETTER EN
        new Spec(GREEK, new int[] {GREEK, 0x3f0, 0x1f00, 0x1fff}, 0x39e , Font.ROMAN_BASELINE), // GREEK CAPITAL LETTER XI
        new Spec(LATIN, new int[] {LATIN, 0x2af, 0x1d00, 0x1d7f, 0x1e00, 0x1eff, 0xfb00, 0xfb4f}, 0x49 , Font.ROMAN_BASELINE), // LATIN CAPITAL LETTER I
        // African Scripts
        new Spec(ETHIOPIC, new int[] {ETHIOPIC, 0x139f, 0x2d80, 0x2ddf}, 0x1260 , Font.ROMAN_BASELINE), // ETHIOPIC SYLLABLE BA
        // NKo = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        new Spec(TIFINAGH, new int[] {TIFINAGH, 0x2d7f}, 0x2d4a , Font.ROMAN_BASELINE), // TIFINAGH LETTER YAZH
        // Middle Eastern Scripts
        new Spec(ARABIC, new int[] {ARABIC, 0x6ff, 0xfb50, 0xfdff, 0xfe70, 0xfeff}, 0x6a1 , Font.ROMAN_BASELINE), // ARABIC LETTER DOTLESS FEH
        new Spec(HEBREW, new int[] {HEBREW, 0x5ff}, 0x5d3 , Font.ROMAN_BASELINE), // HEBREW LETTER DALET
        new Spec(SYRIAC, new int[] {SYRIAC, 0x74f}, 0x716 , Font.ROMAN_BASELINE), // SYRIAC LETTER DOTLESS DALATH RISH
        new Spec(THAANA, new int[] {THAANA, 0x7bf}, 0x782 , Font.ROMAN_BASELINE), // THAANA LETTER NOONU
        // American Scripts
     /*
     CanadianSyllabics = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Cherokee = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Deseret = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
    // Other Scripts
     Shavian = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Osmanya = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Glagolitic = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
    // Indic Scripts
     Bengali = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
      */
        new Spec(DEVANAGARI, new int[] {DEVANAGARI, 0x97f}, 0x917 , Font.ROMAN_BASELINE), // DEVANAGARI LETTER GA
        new Spec(GUJARATI, new int[] {GUJARATI, 0xaff}, 0xabe , Font.ROMAN_BASELINE), // GUJARATI VOWEL SIGN AA
        new Spec(GURMUKHI, new int[] {GURMUKHI, 0xa7f}, 0xa38 , Font.ROMAN_BASELINE), // GURMUKHI LETTER SA
        //Kannada = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Limbu = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Malayalam = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Oriya = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Sinhala = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //SylotiNagri = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        new Spec(TAMIL, new int[] {TAMIL, 0xb8f}, 0xbaa , Font.ROMAN_BASELINE), // TAMIL LETTER PA
        //Telugu = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        // Philippine Scripts
        //Buhid = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        // Hanunoo = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        // Tagalog = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        // Tagbanwa = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        // South East Asian Scripts
        //Buginese = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Balinese = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Khmer = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Lao = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //Myanmar = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        //NewTaiLue = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        // TaiLe = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        new Spec(THAI, new int[] {THAI, 0xe70}, 0xe1a , Font.ROMAN_BASELINE), // THAI CHARACTER BO BAIMAI
        // East Asian Scripts
        new Spec(HAN, new int[] {HAN,0xfff, 0x3190,0x319f, 0x4e00,0x9fff, 0xf900,0xfaff, 0x3400,0x4dbf, 0x20000,0x2a6df, 0x2f800,0x2fa10}, 0x2f01 , Font.ROMAN_BASELINE), // KANGXI RADICAL LINE
        new Spec(BOPOMOFO, new int[] {BOPOMOFO,0x312f, 0x31a0,0x31bf}, 0x3107 , Font.ROMAN_BASELINE), // BOPOMOFO LETTER M
        new Spec(HIRAGANA, new int[] {HIRAGANA,0x309f}, 0x305b , Font.ROMAN_BASELINE), // HRAGANA LETTER SE
        new Spec(KATAKANA, new int[] {KATAKANA,0x30ff, 0xff00,0xffe0, 0x31f0,0x31ff}, 0x30f1 , Font.ROMAN_BASELINE), // KATAKANA LETTER WE
        new Spec(HANGUL, new int[] {HANGUL, 0x11f0}, 0x1100 , Font.ROMAN_BASELINE), // HANGUL CHOSEON KIYEOK
        new Spec(YI, new int[] {YI, 0xA4cf}, 0xa490 , Font.ROMAN_BASELINE), // YI RADICAL QOT
        // Central Asian
        //Kharoshthi = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        new Spec(MONGOLIAN, new int[] {MONGOLIAN, 0x18a0}, 0x1882 , Font.ROMAN_BASELINE), // MONGOLIAN LETTER ALI GALI DAMARU
        //PhagsPa = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
        new Spec(TIBETAN, new int[] {TIBETAN, 0xfff}, 0xf46 , Font.ROMAN_BASELINE), // TIBETAN LETTER CHA
        // Ancient Scripts
     /*
     AncientGreek = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Cuneiform = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     OldPersian = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Ugaritic = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     LinearB = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     AegeanNumbers = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     CountingRodNumbers = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     CypriotSyllabary = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Gothic = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     OldItalic = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
     Ogham = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //*/
        new Spec(RUNIC, new int[] { RUNIC, 0x16ff}, 0x16c1, Font.ROMAN_BASELINE),  // RUNIC LETTER ISAZ IS ISS I
        //Phoenician = { 0x, 0x, 0x , Font.ROMAN_BASELINE), //
    }
    ;
    
    /**
     * Prevent instance creation
     */
    private ScriptSystem() {
    }
    
    /**
     * Returns the script system of the specified char.
     * If the script system can not be determined, LATIN is
     * returned.
     */
    public static int getScriptSystemOf(char ch) {
        // FIXME This takes linear time. Instead of this
        // algorithm, we could take advantage of the fact
        // that all Unicode script systems start at a 16 bit
        // interval (ch modulo 16).
        for (int i=0, n = systems.length; i < n; i++) {
            for (int j=0, m = systems[i].ranges.length; j < m; j+=2) {
                if (ch >= systems[i].ranges[j] && ch <= systems[i].ranges[j + 1]) {
                    return systems[i].system;
                }
            }
        }
        return LATIN;
    }
    /**
     * Returns the best character of the specified script system
     * for measuring perceived ascent and perceived descent of the
     * script.
     */
    public static char getMeasurementChar(int system) {
        // FIXME This takes linear time. Instead of this
        // algorithm, we should use a HashMap.
        for (int i=0, n = systems.length; i < n; i++) {
            if (systems[i].system == system) {
                return (char) systems[i].measurementChar;
            }
        }
        return 'X';
    }
    /**
     * Returns the baseline of the specified script system.
     * java.awt.Font.ROMAN_BASELINE, CENTER_BASELINE or HANGING_BASELINE
     */
    public static int getBaseline(int system) {
        // FIXME This takes linear time. Instead of this
        // algorithm, we should use a HashMap.
        for (int i=0, n = systems.length; i < n; i++) {
            if (systems[i].system == system) {
                return systems[i].baseline;
            }
        }
        return Font.ROMAN_BASELINE;
    }
}