/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // BEGIN android-note // The icu implementation used was changed from icu4j to icu4jni. // END android-note package java.text; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.Comparator; import java.util.Locale; import java.util.Vector; /** * Performs locale-sensitive string comparison. A concrete subclass, * {@link RuleBasedCollator}, allows customization of the collation ordering by * the use of rule sets. * <p> * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s * specifications for the <a * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation * Algorithm (UCA)</a>, there are 4 different levels of strength used in * comparisons: * <ul> * <li>PRIMARY strength: Typically, this is used to denote differences between * base characters (for example, "a" < "b"). It is the strongest difference. * For example, dictionaries are divided into different sections by base * character. * <li>SECONDARY strength: Accents in the characters are considered secondary * differences (for example, "as" < "às" < "at"). Other differences * between letters can also be considered secondary differences, depending on * the language. A secondary difference is ignored when there is a primary * difference anywhere in the strings. * <li>TERTIARY strength: Upper and lower case differences in characters are * distinguished at tertiary strength (for example, "ao" < "Ao" < * "aò"). In addition, a variant of a letter differs from the base form * on the tertiary strength (such as "A" and "Ⓐ"). Another example is the * difference between large and small Kana. A tertiary difference is ignored * when there is a primary or secondary difference anywhere in the strings. * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL * strength is used as a tiebreaker. The Unicode code point values of the NFD * form of each string are compared, just in case there is no difference. For * example, Hebrew cantellation marks are only distinguished at this strength. * This strength should be used sparingly, as only code point value differences * between two strings are an extremely rare occurrence. Using this strength * substantially decreases the performance for both comparison and collation key * generation APIs. This strength also increases the size of the collation key. * </ul> * <p> * This {@code Collator} deals only with two decomposition modes, the canonical * decomposition mode and one that does not use any decomposition. The * compatibility decomposition mode * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the * canonical decomposition mode is set, {@code Collator} handles un-normalized * text properly, producing the same results as if the text were normalized in * NFD. If canonical decomposition is turned off, it is the user's * responsibility to ensure that all text is already in the appropriate form * before performing a comparison or before getting a {@link CollationKey}. * <p> * <em>Examples:</em> * <blockquote> * * <pre> * // Get the Collator for US English and set its strength to PRIMARY * Collator usCollator = Collator.getInstance(Locale.US); * usCollator.setStrength(Collator.PRIMARY); * if (usCollator.compare("abc", "ABC") == 0) { * System.out.println("Strings are equivalent"); * } * </pre> * * </blockquote> * <p> * The following example shows how to compare two strings using the collator for * the default locale. * <blockquote> * * <pre> * // Compare two strings in the default locale * Collator myCollator = Collator.getInstance(); * myCollator.setDecomposition(Collator.NO_DECOMPOSITION); * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) { * System.out.println("\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition"); * myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) { * System.out.println("Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition"); * } else { * System.out.println("\u00e0\u0325 is equal to a\u0325\u0300 with decomposition"); * } * } else { * System.out.println("Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition"); * } * </pre> * * </blockquote> * * @see RuleBasedCollator * @see CollationKey */ public abstract class Collator implements Comparator<Object>, Cloneable { static final int EQUAL = 0; static final int GREATER = 1; static final int LESS = -1; /** * Constant used to specify the decomposition rule. */ public static final int NO_DECOMPOSITION = 0; /** * Constant used to specify the decomposition rule. */ public static final int CANONICAL_DECOMPOSITION = 1; /** * Constant used to specify the decomposition rule. This value for * decomposition is not supported. */ public static final int FULL_DECOMPOSITION = 2; /** * Constant used to specify the collation strength. */ public static final int PRIMARY = 0; /** * Constant used to specify the collation strength. */ public static final int SECONDARY = 1; /** * Constant used to specify the collation strength. */ public static final int TERTIARY = 2; /** * Constant used to specify the collation strength. */ public static final int IDENTICAL = 3; private static int CACHE_SIZE; static { // CACHE_SIZE includes key and value, so needs to be double String cacheSize = AccessController .doPrivileged(new PrivilegedAction<String>() { public String run() { return System.getProperty("collator.cache"); //$NON-NLS-1$ } }); if (cacheSize != null) { try { CACHE_SIZE = Integer.parseInt(cacheSize); } catch (NumberFormatException e) { CACHE_SIZE = 6; } } else { CACHE_SIZE = 6; } } private static Vector<Collator> cache = new Vector<Collator>(CACHE_SIZE); // Wrapper class of ICU4JNI Collator com.ibm.icu4jni.text.Collator icuColl; Collator(com.ibm.icu4jni.text.Collator wrapper) { this.icuColl = wrapper; } /** * Constructs a new {@code Collator} instance. */ protected Collator() { super(); // BEGIN android-added icuColl = com.ibm.icu4jni.text.Collator.getInstance(Locale.getDefault()); // END android-added } /** * Returns a new collator with the same decomposition mode and * strength value as this collator. * * @return a shallow copy of this collator. * @see java.lang.Cloneable */ @Override public Object clone() { try { Collator clone = (Collator) super.clone(); clone.icuColl = (com.ibm.icu4jni.text.Collator) this.icuColl.clone(); return clone; } catch (CloneNotSupportedException e) { throw new AssertionError(e); // android-changed } } /** * Compares two objects to determine their relative order. The objects must * be strings. * * @param object1 * the first string to compare. * @param object2 * the second string to compare. * @return a negative value if {@code object1} is less than {@code object2}, * 0 if they are equal, and a positive value if {@code object1} is * greater than {@code object2}. * @throws ClassCastException * if {@code object1} or {@code object2} is not a {@code String}. */ public int compare(Object object1, Object object2) { return compare((String) object1, (String) object2); } /** * Compares two strings to determine their relative order. * * @param string1 * the first string to compare. * @param string2 * the second string to compare. * @return a negative value if {@code string1} is less than {@code string2}, * 0 if they are equal and a positive value if {@code string1} is * greater than {@code string2}. */ public abstract int compare(String string1, String string2); /** * Compares this collator with the specified object and indicates if they * are equal. * * @param object * the object to compare with this object. * @return {@code true} if {@code object} is a {@code Collator} object and * it has the same strength and decomposition values as this * collator; {@code false} otherwise. * @see #hashCode */ @Override public boolean equals(Object object) { if (!(object instanceof Collator)) { return false; } Collator collator = (Collator) object; return this.icuColl == null ? collator.icuColl == null : this.icuColl .equals(collator.icuColl); } /** * Compares two strings using the collation rules to determine if they are * equal. * * @param string1 * the first string to compare. * @param string2 * the second string to compare. * @return {@code true} if {@code string1} and {@code string2} are equal * using the collation rules, false otherwise. */ public boolean equals(String string1, String string2) { return compare(string1, string2) == 0; } /** * Gets the list of installed {@link java.util.Locale} objects which support * {@code Collator}. * * @return an array of {@code Locale}. */ public static Locale[] getAvailableLocales() { return com.ibm.icu4jni.text.Collator.getAvailableLocales(); } /** * Returns a {@link CollationKey} for the specified string for this collator * with the current decomposition rule and strength value. * * @param string * the source string that is converted into a collation key. * @return the collation key for {@code string}. */ public abstract CollationKey getCollationKey(String string); /** * Returns the decomposition rule for this collator. * * @return the decomposition rule, either {@code NO_DECOMPOSITION} or * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is * not supported. */ public int getDecomposition() { return decompositionMode_ICU_Java(this.icuColl.getDecomposition()); } /** * Returns a {@code Collator} instance which is appropriate for the default * {@code Locale}. * * @return the collator for the default locale. */ public static Collator getInstance() { return getInstance(Locale.getDefault()); } /** * Returns a {@code Collator} instance which is appropriate for the * specified {@code Locale}. * * @param locale * the locale. * @return the collator for {@code locale}. */ public static Collator getInstance(Locale locale) { String key = locale.toString(); for (int i = cache.size() - 1; i >= 0; i -= 2) { if (cache.elementAt(i).equals(key)) { return (Collator) (cache.elementAt(i - 1)).clone(); } } return new RuleBasedCollator(com.ibm.icu4jni.text.Collator .getInstance(locale)); } /** * Returns the strength value for this collator. * * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or * IDENTICAL. */ public int getStrength() { return strength_ICU_Java(this.icuColl.getStrength()); } /** * Returns an integer hash code for this collator. * * @return this collator's hash code. * * @see #equals(Object) * @see #equals(String, String) */ @Override public abstract int hashCode(); /** * Sets the decomposition rule for this collator. * * @param value * the decomposition rule, either {@code NO_DECOMPOSITION} or * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} * is not supported. * @throws IllegalArgumentException * if the provided decomposition rule is not valid. This includes * {@code FULL_DECOMPOSITION}. */ public void setDecomposition(int value) { this.icuColl.setDecomposition(decompositionMode_Java_ICU(value)); } /** * Sets the strength value for this collator. * * @param value * the strength value, either PRIMARY, SECONDARY, TERTIARY, or * IDENTICAL. * @throws IllegalArgumentException * if the provided strength value is not valid. */ public void setStrength(int value) { this.icuColl.setStrength(strength_Java_ICU(value)); } private int decompositionMode_Java_ICU(int mode) { int icuDecomp = mode; switch (mode) { case Collator.CANONICAL_DECOMPOSITION: icuDecomp = com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION; break; case Collator.NO_DECOMPOSITION: icuDecomp = com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION; break; } return icuDecomp; } private int decompositionMode_ICU_Java(int mode) { int javaMode = mode; switch (mode) { case com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION: javaMode = Collator.NO_DECOMPOSITION; break; case com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION: javaMode = Collator.CANONICAL_DECOMPOSITION; break; } return javaMode; } private int strength_Java_ICU(int value) { int icuValue = value; switch (value) { case Collator.PRIMARY: icuValue = com.ibm.icu4jni.text.Collator.PRIMARY; break; case Collator.SECONDARY: icuValue = com.ibm.icu4jni.text.Collator.SECONDARY; break; case Collator.TERTIARY: icuValue = com.ibm.icu4jni.text.Collator.TERTIARY; break; case Collator.IDENTICAL: icuValue = com.ibm.icu4jni.text.Collator.IDENTICAL; break; } return icuValue; } private int strength_ICU_Java(int value) { int javaValue = value; switch (value) { case com.ibm.icu4jni.text.Collator.PRIMARY: javaValue = Collator.PRIMARY; break; case com.ibm.icu4jni.text.Collator.SECONDARY: javaValue = Collator.SECONDARY; break; case com.ibm.icu4jni.text.Collator.TERTIARY: javaValue = Collator.TERTIARY; break; case com.ibm.icu4jni.text.Collator.IDENTICAL: javaValue = Collator.IDENTICAL; break; } return javaValue; } }