Collator.java example

Explorer
android_libcore-master
/* 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// BEGIN android-note
// The icu implementation used was changed from icu4j to icu4jni.
// END android-note

package java.text;

import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Comparator;
import java.util.Locale;
import java.util.Vector;

/**
 * Performs locale-sensitive string comparison. A concrete subclass,
 * {@link RuleBasedCollator}, allows customization of the collation ordering by
 * the use of rule sets.
 * <p>
 * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s
 * specifications for the <a
 * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation
 * Algorithm (UCA)</a>, there are 4 different levels of strength used in
 * comparisons:
 * <ul>
 * <li>PRIMARY strength: Typically, this is used to denote differences between
 * base characters (for example, "a" < "b"). It is the strongest difference.
 * For example, dictionaries are divided into different sections by base
 * character.
 * <li>SECONDARY strength: Accents in the characters are considered secondary
 * differences (for example, "as" < "às" < "at"). Other differences
 * between letters can also be considered secondary differences, depending on
 * the language. A secondary difference is ignored when there is a primary
 * difference anywhere in the strings.
 * <li>TERTIARY strength: Upper and lower case differences in characters are
 * distinguished at tertiary strength (for example, "ao" < "Ao" <
 * "aò"). In addition, a variant of a letter differs from the base form
 * on the tertiary strength (such as "A" and "Ⓐ"). Another example is the
 * difference between large and small Kana. A tertiary difference is ignored
 * when there is a primary or secondary difference anywhere in the strings.
 * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL
 * strength is used as a tiebreaker. The Unicode code point values of the NFD
 * form of each string are compared, just in case there is no difference. For
 * example, Hebrew cantellation marks are only distinguished at this strength.
 * This strength should be used sparingly, as only code point value differences
 * between two strings are an extremely rare occurrence. Using this strength
 * substantially decreases the performance for both comparison and collation key
 * generation APIs. This strength also increases the size of the collation key.
 * </ul>
 * <p>
 * This {@code Collator} deals only with two decomposition modes, the canonical
 * decomposition mode and one that does not use any decomposition. The
 * compatibility decomposition mode
 * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the
 * canonical decomposition mode is set, {@code Collator} handles un-normalized
 * text properly, producing the same results as if the text were normalized in
 * NFD. If canonical decomposition is turned off, it is the user's
 * responsibility to ensure that all text is already in the appropriate form
 * before performing a comparison or before getting a {@link CollationKey}.
 * <p>
 * <em>Examples:</em>
 * <blockquote>
 *
 * <pre>
 * // Get the Collator for US English and set its strength to PRIMARY
 * Collator usCollator = Collator.getInstance(Locale.US);
 * usCollator.setStrength(Collator.PRIMARY);
 * if (usCollator.compare("abc", "ABC") == 0) {
 *     System.out.println("Strings are equivalent");
 * }
 * </pre>
 *
 * </blockquote>
 * <p>
 * The following example shows how to compare two strings using the collator for
 * the default locale.
 * <blockquote>
 *
 * <pre>
 * // Compare two strings in the default locale
 * Collator myCollator = Collator.getInstance();
 * myCollator.setDecomposition(Collator.NO_DECOMPOSITION);
 * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) {
 *     System.out.println("\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition");
 *     myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
 *     if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) {
 *         System.out.println("Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition");
 *     } else {
 *         System.out.println("\u00e0\u0325 is equal to a\u0325\u0300 with decomposition");
 *     }
 * } else {
 *     System.out.println("Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition");
 * }
 * </pre>
 *
 * </blockquote>
 *
 * @see RuleBasedCollator
 * @see CollationKey
 */
public abstract class Collator implements Comparator<Object>, Cloneable {

    static final int EQUAL = 0;

    static final int GREATER = 1;

    static final int LESS = -1;

    /**
     * Constant used to specify the decomposition rule.
     */
    public static final int NO_DECOMPOSITION = 0;

    /**
     * Constant used to specify the decomposition rule.
     */
    public static final int CANONICAL_DECOMPOSITION = 1;

    /**
     * Constant used to specify the decomposition rule. This value for
     * decomposition is not supported.
     */
    public static final int FULL_DECOMPOSITION = 2;

    /**
     * Constant used to specify the collation strength.
     */
    public static final int PRIMARY = 0;

    /**
     * Constant used to specify the collation strength.
     */
    public static final int SECONDARY = 1;

    /**
     * Constant used to specify the collation strength.
     */
    public static final int TERTIARY = 2;

    /**
     * Constant used to specify the collation strength.
     */
    public static final int IDENTICAL = 3;

    private static int CACHE_SIZE;

    static {
        // CACHE_SIZE includes key and value, so needs to be double
        String cacheSize = AccessController
                .doPrivileged(new PrivilegedAction<String>() {
                    public String run() {
                        return System.getProperty("collator.cache"); //$NON-NLS-1$
                    }
                });
        if (cacheSize != null) {
            try {
                CACHE_SIZE = Integer.parseInt(cacheSize);
            } catch (NumberFormatException e) {
                CACHE_SIZE = 6;
            }
        } else {
            CACHE_SIZE = 6;
        }
    }

    private static Vector<Collator> cache = new Vector<Collator>(CACHE_SIZE);

    // Wrapper class of ICU4JNI Collator
    com.ibm.icu4jni.text.Collator icuColl;

    Collator(com.ibm.icu4jni.text.Collator wrapper) {
        this.icuColl = wrapper;
    }

    /**
     * Constructs a new {@code Collator} instance.
     */
    protected Collator() {
        super();
        // BEGIN android-added
        icuColl = com.ibm.icu4jni.text.Collator.getInstance(Locale.getDefault());
        // END android-added
    }

    /**
     * Returns a new collator with the same decomposition mode and
     * strength value as this collator.
     * 
     * @return a shallow copy of this collator.
     * @see java.lang.Cloneable
     */
    @Override
    public Object clone() {
        try {
            Collator clone = (Collator) super.clone();
            clone.icuColl = (com.ibm.icu4jni.text.Collator) this.icuColl.clone();
            return clone;
        } catch (CloneNotSupportedException e) {
            throw new AssertionError(e); // android-changed
        }
    }

    /**
     * Compares two objects to determine their relative order. The objects must
     * be strings.
     * 
     * @param object1
     *            the first string to compare.
     * @param object2
     *            the second string to compare.
     * @return a negative value if {@code object1} is less than {@code object2},
     *         0 if they are equal, and a positive value if {@code object1} is
     *         greater than {@code object2}.
     * @throws ClassCastException
     *         if {@code object1} or {@code object2} is not a {@code String}.
     */
    public int compare(Object object1, Object object2) {
        return compare((String) object1, (String) object2);
    }

    /**
     * Compares two strings to determine their relative order.
     * 
     * @param string1
     *            the first string to compare.
     * @param string2
     *            the second string to compare.
     * @return a negative value if {@code string1} is less than {@code string2},
     *         0 if they are equal and a positive value if {@code string1} is
     *         greater than {@code string2}.
     */
    public abstract int compare(String string1, String string2);

    /**
     * Compares this collator with the specified object and indicates if they
     * are equal.
     * 
     * @param object
     *            the object to compare with this object.
     * @return {@code true} if {@code object} is a {@code Collator} object and
     *         it has the same strength and decomposition values as this
     *         collator; {@code false} otherwise.
     * @see #hashCode
     */
    @Override
    public boolean equals(Object object) {
        if (!(object instanceof Collator)) {
            return false;
        }
        Collator collator = (Collator) object;
        return this.icuColl == null ? collator.icuColl == null : this.icuColl
                .equals(collator.icuColl);
    }

    /**
     * Compares two strings using the collation rules to determine if they are
     * equal.
     * 
     * @param string1
     *            the first string to compare.
     * @param string2
     *            the second string to compare.
     * @return {@code true} if {@code string1} and {@code string2} are equal
     *         using the collation rules, false otherwise.
     */
    public boolean equals(String string1, String string2) {
        return compare(string1, string2) == 0;
    }

    /**
     * Gets the list of installed {@link java.util.Locale} objects which support
     * {@code Collator}.
     * 
     * @return an array of {@code Locale}.
     */
    public static Locale[] getAvailableLocales() {
        return com.ibm.icu4jni.text.Collator.getAvailableLocales();
    }

    /**
     * Returns a {@link CollationKey} for the specified string for this collator
     * with the current decomposition rule and strength value.
     * 
     * @param string
     *            the source string that is converted into a collation key.
     * @return the collation key for {@code string}.
     */
    public abstract CollationKey getCollationKey(String string);

    /**
     * Returns the decomposition rule for this collator.
     * 
     * @return the decomposition rule, either {@code NO_DECOMPOSITION} or
     *         {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is
     *         not supported.
     */
    public int getDecomposition() {
        return decompositionMode_ICU_Java(this.icuColl.getDecomposition());
    }

    /**
     * Returns a {@code Collator} instance which is appropriate for the default
     * {@code Locale}.
     * 
     * @return the collator for the default locale.
     */
    public static Collator getInstance() {
        return getInstance(Locale.getDefault());
    }

    /**
     * Returns a {@code Collator} instance which is appropriate for the
     * specified {@code Locale}.
     * 
     * @param locale
     *            the locale.
     * @return the collator for {@code locale}.
     */
    public static Collator getInstance(Locale locale) {
        String key = locale.toString();
        for (int i = cache.size() - 1; i >= 0; i -= 2) {
            if (cache.elementAt(i).equals(key)) {
                return (Collator) (cache.elementAt(i - 1)).clone();
            }
        }

        return new RuleBasedCollator(com.ibm.icu4jni.text.Collator
                .getInstance(locale));
    }

    /**
     * Returns the strength value for this collator.
     * 
     * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or
     *         IDENTICAL.
     */
    public int getStrength() {
        return strength_ICU_Java(this.icuColl.getStrength());
    }

    /**
     * Returns an integer hash code for this collator.
     * 
     * @return this collator's hash code.
     * 
     * @see #equals(Object)
     * @see #equals(String, String)
     */
    @Override
    public abstract int hashCode();

    /**
     * Sets the decomposition rule for this collator.
     * 
     * @param value
     *            the decomposition rule, either {@code NO_DECOMPOSITION} or
     *            {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION}
     *            is not supported.
     * @throws IllegalArgumentException
     *            if the provided decomposition rule is not valid. This includes
     *            {@code FULL_DECOMPOSITION}.
     */
    public void setDecomposition(int value) {
        this.icuColl.setDecomposition(decompositionMode_Java_ICU(value));
    }

    /**
     * Sets the strength value for this collator.
     * 
     * @param value
     *            the strength value, either PRIMARY, SECONDARY, TERTIARY, or
     *            IDENTICAL.
     * @throws IllegalArgumentException
     *            if the provided strength value is not valid.
     */
    public void setStrength(int value) {
        this.icuColl.setStrength(strength_Java_ICU(value));
    }

    private int decompositionMode_Java_ICU(int mode) {
        int icuDecomp = mode;
        switch (mode) {
            case Collator.CANONICAL_DECOMPOSITION:
                icuDecomp = com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION;
                break;
            case Collator.NO_DECOMPOSITION:
                icuDecomp = com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION;
                break;
        }
        return icuDecomp;
    }

    private int decompositionMode_ICU_Java(int mode) {
        int javaMode = mode;
        switch (mode) {
            case com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION:
                javaMode = Collator.NO_DECOMPOSITION;
                break;
            case com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION:
                javaMode = Collator.CANONICAL_DECOMPOSITION;
                break;
        }
        return javaMode;
    }

    private int strength_Java_ICU(int value) {
        int icuValue = value;
        switch (value) {
            case Collator.PRIMARY:
                icuValue = com.ibm.icu4jni.text.Collator.PRIMARY;
                break;
            case Collator.SECONDARY:
                icuValue = com.ibm.icu4jni.text.Collator.SECONDARY;
                break;
            case Collator.TERTIARY:
                icuValue = com.ibm.icu4jni.text.Collator.TERTIARY;
                break;
            case Collator.IDENTICAL:
                icuValue = com.ibm.icu4jni.text.Collator.IDENTICAL;
                break;
        }
        return icuValue;

    }

    private int strength_ICU_Java(int value) {
        int javaValue = value;
        switch (value) {
            case com.ibm.icu4jni.text.Collator.PRIMARY:
                javaValue = Collator.PRIMARY;
                break;
            case com.ibm.icu4jni.text.Collator.SECONDARY:
                javaValue = Collator.SECONDARY;
                break;
            case com.ibm.icu4jni.text.Collator.TERTIARY:
                javaValue = Collator.TERTIARY;
                break;
            case com.ibm.icu4jni.text.Collator.IDENTICAL:
                javaValue = Collator.IDENTICAL;
                break;
        }
        return javaValue;
    }
}