IndicScriptProcessor.java example

Explorer
fop-master
- fop-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id$ */

package org.apache.fop.complexscripts.scripts;

import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.fop.complexscripts.fonts.GlyphTable;
import org.apache.fop.complexscripts.util.CharAssociation;
import org.apache.fop.complexscripts.util.CharScript;
import org.apache.fop.complexscripts.util.GlyphContextTester;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;

// CSOFF: LineLengthCheck

/**
 * <p>The <code>IndicScriptProcessor</code> class implements a script processor for
 * performing glyph substitution and positioning operations on content associated with the Indic script.</p>
 *
 * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
 */
public class IndicScriptProcessor extends DefaultScriptProcessor {

    /** logging instance */
    private static final Log log = LogFactory.getLog(IndicScriptProcessor.class);

    /** required features to use for substitutions */
    private static final String[] GSUB_REQ_FEATURES =
    {
        "abvf",                                                 // above base forms
        "abvs",                                                 // above base substitutions
        "akhn",                                                 // akhand
        "blwf",                                                 // below base forms
        "blws",                                                 // below base substitutions
        "ccmp",                                                 // glyph composition/decomposition
        "cjct",                                                 // conjunct forms
        "clig",                                                 // contextual ligatures
        "half",                                                 // half forms
        "haln",                                                 // halant forms
        "locl",                                                 // localized forms
        "nukt",                                                 // nukta forms
        "pref",                                                 // pre-base forms
        "pres",                                                 // pre-base substitutions
        "pstf",                                                 // post-base forms
        "psts",                                                 // post-base substitutions
        "rkrf",                                                 // rakar forms
        "rphf",                                                 // reph form
        "vatu"                                                  // vattu variants
    };

    /** optional features to use for substitutions */
    private static final String[] GSUB_OPT_FEATURES =
    {
        "afrc",                                                 // alternative fractions
        "calt",                                                 // contextual alternatives
        "dlig"                                                  // discretionary ligatures
    };

    /** required features to use for positioning */
    private static final String[] GPOS_REQ_FEATURES =
    {
        "abvm",                                                 // above base marks
        "blwm",                                                 // below base marks
        "dist",                                                 // distance (adjustment)
        "kern"                                                  // kerning
    };

    /** required features to use for positioning */
    private static final String[] GPOS_OPT_FEATURES =
    {
    };

    private static class SubstitutionScriptContextTester implements ScriptContextTester {
        private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>();
        public GlyphContextTester getTester(String feature) {
            return testerMap.get(feature);
        }
    }

    private static class PositioningScriptContextTester implements ScriptContextTester {
        private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>();
        public GlyphContextTester getTester(String feature) {
            return testerMap.get(feature);
        }
    }

    /**
     * Make script specific flavor of Indic script processor.
     * @param script tag
     * @return script processor instance
     */
    public static ScriptProcessor makeProcessor(String script) {
        switch (CharScript.scriptCodeFromTag(script)) {
        case CharScript.SCRIPT_DEVANAGARI:
        case CharScript.SCRIPT_DEVANAGARI_2:
            return new DevanagariScriptProcessor(script);
        case CharScript.SCRIPT_GUJARATI:
        case CharScript.SCRIPT_GUJARATI_2:
            return new GujaratiScriptProcessor(script);
        case CharScript.SCRIPT_GURMUKHI:
        case CharScript.SCRIPT_GURMUKHI_2:
            return new GurmukhiScriptProcessor(script);
        case CharScript.SCRIPT_TAMIL:
        case CharScript.SCRIPT_TAMIL_2:
            return new TamilScriptProcessor(script);
        // [TBD] implement other script processors
        default:
            return new IndicScriptProcessor(script);
        }
    }

    private final ScriptContextTester subContextTester;
    private final ScriptContextTester posContextTester;

    IndicScriptProcessor(String script) {
        super(script);
        this.subContextTester = new SubstitutionScriptContextTester();
        this.posContextTester = new PositioningScriptContextTester();
    }

    /** {@inheritDoc} */
    public String[] getSubstitutionFeatures() {
        return GSUB_REQ_FEATURES;
    }

    /** {@inheritDoc} */
    public String[] getOptionalSubstitutionFeatures() {
        return GSUB_OPT_FEATURES;
    }

    /** {@inheritDoc} */
    public ScriptContextTester getSubstitutionContextTester() {
        return subContextTester;
    }

    /** {@inheritDoc} */
    public String[] getPositioningFeatures() {
        return GPOS_REQ_FEATURES;
    }

    /** {@inheritDoc} */
    public String[] getOptionalPositioningFeatures() {
        return GPOS_OPT_FEATURES;
    }

    /** {@inheritDoc} */
    public ScriptContextTester getPositioningContextTester() {
        return posContextTester;
    }

    /** {@inheritDoc} */
    @Override
    public GlyphSequence substitute(GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct) {
        assert usa != null;
        // 1. syllabize
        GlyphSequence[] sa = syllabize(gs, script, language);
        // 2. process each syllable
        for (int i = 0, n = sa.length; i < n; i++) {
            GlyphSequence s = sa [ i ];
            // apply basic shaping subs
            for (GlyphTable.UseSpec us : usa) {
                if (isBasicShapingUse(us)) {
                    s.setPredications(true);
                    s = us.substitute(s, script, language, sct);
                }
            }
            // reorder pre-base matra
            s = reorderPreBaseMatra(s);
            // reorder reph
            s = reorderReph(s);
            // apply presentation subs
            for (GlyphTable.UseSpec us : usa) {
                if (isPresentationUse(us)) {
                    s.setPredications(true);
                    s = us.substitute(s, script, language, sct);
                }
            }
            // record result
            sa [ i ] = s;
        }
        // 3. return reassembled substituted syllables
        return unsyllabize(gs, sa);
    }

    /**
     * Get script specific syllabizer class.
     * @return a syllabizer class object or null
     */
    protected Class<? extends Syllabizer> getSyllabizerClass() {
        return null;
    }

    private GlyphSequence[] syllabize(GlyphSequence gs, String script, String language) {
        return Syllabizer.getSyllabizer(script, language, getSyllabizerClass()).syllabize(gs);
    }

    private GlyphSequence unsyllabize(GlyphSequence gs, GlyphSequence[] sa) {
        return GlyphSequence.join(gs, sa);
    }

    private static Set<String> basicShapingFeatures;
    private static final String[] BASIC_SHAPING_FEATURE_STRINGS = {
        "abvf",
        "akhn",
        "blwf",
        "cjct",
        "half",
        "locl",
        "nukt",
        "pref",
        "pstf",
        "rkrf",
        "rphf",
        "vatu",
    };
    static {
        basicShapingFeatures = new HashSet<String>();
        Collections.addAll(basicShapingFeatures, BASIC_SHAPING_FEATURE_STRINGS);
    }
    private boolean isBasicShapingUse(GlyphTable.UseSpec us) {
        assert us != null;
        if (basicShapingFeatures != null) {
            return basicShapingFeatures.contains(us.getFeature());
        } else {
            return false;
        }
    }

    private static  Set<String> presentationFeatures;
    private static final String[] PRESENTATION_FEATURE_STRINGS = {
        "abvs",
        "blws",
        "calt",
        "haln",
        "pres",
        "psts",
    };
    static {
        presentationFeatures = new HashSet<String>();
        Collections.addAll(presentationFeatures, PRESENTATION_FEATURE_STRINGS);
    }
    private boolean isPresentationUse(GlyphTable.UseSpec us) {
        assert us != null;
        if (presentationFeatures != null) {
            return presentationFeatures.contains(us.getFeature());
        } else {
            return false;
        }
    }

    private GlyphSequence reorderPreBaseMatra(GlyphSequence gs) {
        int source;
        if ((source = findPreBaseMatra(gs)) >= 0) {
            int target;
            if ((target = findPreBaseMatraTarget(gs, source)) >= 0) {
                if (target != source) {
                    gs = reorder(gs, source, target);
                }
            }
        }
        return gs;
    }

    /**
     * Find pre-base matra in sequence.
     * @param gs input sequence
     * @return index of pre-base matra or -1 if not found
     */
    protected int findPreBaseMatra(GlyphSequence gs) {
        return -1;
    }

    /**
     * Find pre-base matra target in sequence.
     * @param gs input sequence
     * @param source index of pre-base matra
     * @return index of pre-base matra target or -1
     */
    protected int findPreBaseMatraTarget(GlyphSequence gs, int source) {
        return -1;
    }

    private GlyphSequence reorderReph(GlyphSequence gs) {
        int source;
        if ((source = findReph(gs)) >= 0) {
            int target;
            if ((target = findRephTarget(gs, source)) >= 0) {
                if (target != source) {
                    gs = reorder(gs, source, target);
                }
            }
        }
        return gs;
    }

    /**
     * Find reph in sequence.
     * @param gs input sequence
     * @return index of reph or -1 if not found
     */
    protected int findReph(GlyphSequence gs) {
        return -1;
    }

    /**
     * Find reph target in sequence.
     * @param gs input sequence
     * @param source index of reph
     * @return index of reph target or -1
     */
    protected int findRephTarget(GlyphSequence gs, int source) {
        return -1;
    }

    private GlyphSequence reorder(GlyphSequence gs, int source, int target) {
        return GlyphSequence.reorder(gs, source, 1, target);
    }

    /** {@inheritDoc} */
    @Override
    public boolean position(GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct) {
        boolean adjusted = super.position(gs, script, language, fontSize, usa, widths, adjustments, sct);
        return adjusted;
    }

    /** Abstract syllabizer. */
    protected abstract static class Syllabizer implements Comparable {
        private String script;
        private String language;
        Syllabizer(String script, String language) {
            this.script = script;
            this.language = language;
        }
        /**
         * Subdivide glyph sequence GS into syllabic segments each represented by a distinct
         * output glyph sequence.
         * @param gs input glyph sequence
         * @return segmented syllabic glyph sequences
         */
        abstract GlyphSequence[] syllabize(GlyphSequence gs);
        /** {@inheritDoc} */
        public int hashCode() {
            int hc = 0;
            hc =  7 * hc + (hc ^ script.hashCode());
            hc = 11 * hc + (hc ^ language.hashCode());
            return hc;
        }
        /** {@inheritDoc} */
        public boolean equals(Object o) {
            if (o instanceof Syllabizer) {
                Syllabizer s = (Syllabizer) o;
                if (!s.script.equals(script)) {
                    return false;
                } else {
                    return s.language.equals(language);
                }
            } else {
                return false;
            }
        }
        /** {@inheritDoc} */
        public int compareTo(Object o) {
            int d;
            if (o instanceof Syllabizer) {
                Syllabizer s = (Syllabizer) o;
                if ((d = script.compareTo(s.script)) == 0) {
                    d = language.compareTo(s.language);
                }
            } else {
                d = -1;
            }
            return d;
        }
        private static Map<String, Syllabizer> syllabizers = new HashMap<String, Syllabizer>();
        static Syllabizer getSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
            String sid = makeSyllabizerId(script, language);
            Syllabizer s = syllabizers.get(sid);
            if (s == null) {
                if ((syllabizerClass == null) || ((s = makeSyllabizer(script, language, syllabizerClass)) == null)) {
                    log.warn("No syllabizer available for script '" + script + "', language '" + language + "', using default Indic syllabizer.");
                    s = new DefaultSyllabizer(script, language);
                }
                syllabizers.put(sid, s);
            }
            return s;
        }
        static String makeSyllabizerId(String script, String language) {
            return script + ":" + language;
        }
        static Syllabizer makeSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
            Syllabizer s;
            try {
                Constructor<? extends Syllabizer> cf = syllabizerClass.getDeclaredConstructor(new Class[] { String.class, String.class });
                s = (Syllabizer) cf.newInstance(script, language);
            } catch (NoSuchMethodException e) {
                s = null;
            } catch (InstantiationException e) {
                s = null;
            } catch (IllegalAccessException e) {
                s = null;
            } catch (InvocationTargetException e) {
                s = null;
            }
            return s;
        }
    }

    /** Default syllabizer. */
    protected static class DefaultSyllabizer extends Syllabizer {
        DefaultSyllabizer(String script, String language) {
            super(script, language);
        }
        /** {@inheritDoc} */
        @Override
        GlyphSequence[] syllabize(GlyphSequence gs) {
            int[] ca = gs.getCharacterArray(false);
            int   nc = gs.getCharacterCount();
            if (nc == 0) {
                return new GlyphSequence[] { gs };
            } else {
                return segmentize(gs, segmentize(ca, nc));
            }
        }
        /**
         * Construct array of segements from original character array (associated with original glyph sequence)
         * @param ca input character sequence
         * @param nc number of characters in sequence
         * @return array of syllable segments
         */
        protected Segment[] segmentize(int[] ca, int nc) {
            Vector<Segment> sv = new Vector<Segment>(nc);
            for (int s = 0, e = nc; s < e; ) {
                int i;
                if ((i = findStartOfSyllable(ca, s, e)) < e) {
                    if (s < i) {
                        // from s to i is non-syllable segment
                        sv.add(new Segment(s, i, Segment.OTHER));
                    }
                    s = i; // move s to start of syllable
                } else {
                    if (s < e) {
                        // from s to e is non-syllable segment
                        sv.add(new Segment(s, e, Segment.OTHER));
                    }
                    s = e; // move s to end of input sequence
                }
                if ((i = findEndOfSyllable(ca, s, e)) > s) {
                    if (s < i) {
                        // from s to i is syllable segment
                        sv.add(new Segment(s, i, Segment.SYLLABLE));
                    }
                    s = i; // move s to end of syllable
                } else {
                    if (s < e) {
                        // from s to e is non-syllable segment
                        sv.add(new Segment(s, e, Segment.OTHER));
                    }
                    s = e; // move s to end of input sequence
                }
            }
            return sv.toArray(new Segment [ sv.size() ]);
        }
        /**
         * Construct array of glyph sequences from original glyph sequence and segment array.
         * @param gs original input glyph sequence
         * @param sa segment array
         * @return array of glyph sequences each belonging to an (ordered) segment in SA
         */
        protected GlyphSequence[] segmentize(GlyphSequence gs, Segment[] sa) {
            int   ng = gs.getGlyphCount();
            int[] ga = gs.getGlyphArray(false);
            CharAssociation[] aa = gs.getAssociations(0, -1);
            Vector<GlyphSequence> nsv = new Vector<GlyphSequence>();
            for (Segment s : sa) {
                Vector<Integer> ngv = new Vector<Integer>(ng);
                Vector<CharAssociation> nav = new Vector<CharAssociation>(ng);
                for (int j = 0; j < ng; j++) {
                    CharAssociation ca = aa[j];
                    if (ca.contained(s.getOffset(), s.getCount())) {
                        ngv.add(ga[j]);
                        nav.add(ca);
                    }
                }
                if (ngv.size() > 0) {
                    nsv.add(new GlyphSequence(gs, null, toIntArray(ngv), null, null, nav.toArray(new CharAssociation[nav.size()]), null));
                }
            }
            if (nsv.size() > 0) {
                return nsv.toArray(new GlyphSequence [ nsv.size() ]);
            } else {
                return new GlyphSequence[] { gs };
            }
        }
        /**
         * Find start of syllable in character array, starting at S, ending at E.
         * @param ca character array
         * @param s start index
         * @param e end index
         * @return index of start or E if no start found
         */
        protected int findStartOfSyllable(int[] ca, int s, int e) {
            return e;
        }
        /**
         * Find end of syllable in character array, starting at S, ending at E.
         * @param ca character array
         * @param s start index
         * @param e end index
         * @return index of start or S if no end found
         */
        protected int findEndOfSyllable(int[] ca, int s, int e) {
            return s;
        }
        private static int[] toIntArray(Vector<Integer> iv) {
            int ni = iv.size();
            int[] ia = new int [ iv.size() ];
            for (int i = 0, n = ni; i < n; i++) {
                ia [ i ] = (int) iv.get(i);
            }
            return ia;
        }
    }

    /** Syllabic segment. */
    protected static class Segment {

        static final int OTHER = 0;            // other (non-syllable) characters
        static final int SYLLABLE = 1;         // (orthographic) syllable

        private int start;
        private int end;
        private int type;

        Segment(int start, int end, int type) {
            this.start = start;
            this.end = end;
            this.type = type;
        }

        int getStart() {
            return start;
        }

        int getEnd() {
            return end;
        }

        int getOffset() {
            return start;
        }

        int getCount() {
            return end - start;
        }

        int getType() {
            return type;
        }
    }
}