/** ******************************************************************************* * Copyright (C) 1996-2005, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * * ******************************************************************************* */ package com.ibm.icu4jni.text; import java.util.Locale; import com.ibm.icu4jni.text.RuleBasedCollator; /** * Abstract class handling locale specific collation via JNI and ICU. * Subclasses implement specific collation strategies. One subclass, * com.ibm.icu4jni.text.RuleBasedCollator, is currently provided and is * applicable to a wide set of languages. Other subclasses may be created to * handle more specialized needs. * You can use the static factory method, getInstance(), to obtain the * appropriate Collator object for a given locale. * * <pre> * // Compare two strings in the default locale * Collator myCollator = Collator.getInstance(); * if (myCollator.compare("abc", "ABC") < 0) { * System.out.println("abc is less than ABC"); * } * else { * System.out.println("abc is greater than or equal to ABC"); * } * </pre> * * You can set a Collator's strength property to determine the level of * difference considered significant in comparisons. * Five strengths in CollationAttribute are provided: VALUE_PRIMARY, * VALUE_SECONDARY, VALUE_TERTIARY, VALUE_QUARTENARY and VALUE_IDENTICAL. * The exact assignment of strengths to language features is locale dependant. * For example, in Czech, "e" and "f" are considered primary differences, while * "e" and "?" latin small letter e with circumflex are secondary differences, * "e" and "E" are tertiary differences and "e" and "e" are identical. * * <p> * The following shows how both case and accents could be ignored for US * English. * <pre> * //Get the Collator for US English and set its strength to PRIMARY * Collator usCollator = Collator.getInstance(Locale.US); * usCollator.setStrength(Collator.PRIMARY); * if (usCollator.compare("abc", "ABC") == 0) { * System.out.println("Strings are equivalent"); * } * </pre> * For comparing Strings exactly once, the compare method provides the best * performance. * When sorting a list of Strings however, it is generally necessary to compare * each String multiple times. * In this case, com.ibm.icu4jni.text.CollationKey provide better performance. * The CollationKey class converts a String to a series of bits that can be * compared bitwise against other CollationKeys. * A CollationKey is created by a Collator object for a given String. * Note: CollationKeys from different Collators can not be compared. * </p> * * Considerations : * 1) ErrorCode not returned to user throw exceptions instead * 2) Similar API to java.text.Collator * @author syn wee quek * @stable ICU 2.4 */ public abstract class Collator implements Cloneable { // public data members --------------------------------------------------- /** * Strongest collator strength value. Typically used to denote differences * between base characters. See class documentation for more explanation. * @see #setStrength * @see #getStrength * @stable ICU 2.4 */ public final static int PRIMARY = CollationAttribute.VALUE_PRIMARY; /** * Second level collator strength value. * Accents in the characters are considered secondary differences. * Other differences between letters can also be considered secondary * differences, depending on the language. * See class documentation for more explanation. * @see #setStrength * @see #getStrength * @stable ICU 2.4 */ public final static int SECONDARY = CollationAttribute.VALUE_SECONDARY; /** * Third level collator strength value. * Upper and lower case differences in characters are distinguished at this * strength level. In addition, a variant of a letter differs from the base * form on the tertiary level. * See class documentation for more explanation. * @see #setStrength * @see #getStrength * @stable ICU 2.4 */ public final static int TERTIARY = CollationAttribute.VALUE_TERTIARY; /** * Fourth level collator strength value. * When punctuation is ignored * <a href="http://www-124.ibm.com/icu/userguide/Collate_Concepts.html#Ignoring_Punctuation"> * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY * strength, an additional strength level can * be used to distinguish words with and without punctuation. * See class documentation for more explanation. * @see #setStrength * @see #getStrength * @stable ICU 2.4 */ public final static int QUATERNARY = CollationAttribute.VALUE_QUATERNARY; /** * <p> * Smallest Collator strength value. When all other strengths are equal, * the IDENTICAL strength is used as a tiebreaker. The Unicode code point * values of the NFD form of each string are compared, just in case there * is no difference. * See class documentation for more explanation. * </p> * <p> * Note this value is different from JDK's * </p> * @stable ICU 2.4 */ public final static int IDENTICAL = CollationAttribute.VALUE_IDENTICAL; /** * <p>Decomposition mode value. With NO_DECOMPOSITION set, Strings * will not be decomposed for collation. This is the default * decomposition setting unless otherwise specified by the locale * used to create the Collator.</p> * * <p><strong>Note</strong> this value is different from the JDK's.</p> * @see #CANONICAL_DECOMPOSITION * @see #getDecomposition * @see #setDecomposition * @stable ICU 2.4 */ public final static int NO_DECOMPOSITION = CollationAttribute.VALUE_OFF; /** * <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set, * characters that are canonical variants according to the Unicode standard * will be decomposed for collation.</p> * * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as * described in <a href="http://www.unicode.org/unicode/reports/tr15/"> * Unicode Technical Report #15</a>. * </p> * @see #NO_DECOMPOSITION * @see #getDecomposition * @see #setDecomposition * @stable ICU 2.4 */ public final static int CANONICAL_DECOMPOSITION = CollationAttribute.VALUE_ON; // Collation result constants ----------------------------------- // corresponds to ICU's UCollationResult enum balues /** * string a == string b * @stable ICU 2.4 */ public static final int RESULT_EQUAL = 0; /** * string a > string b * @stable ICU 2.4 */ public static final int RESULT_GREATER = 1; /** * string a < string b * @stable ICU 2.4 */ public static final int RESULT_LESS = -1; /** * accepted by most attributes * @stable ICU 2.4 */ public static final int RESULT_DEFAULT = -1; // public methods ----------------------------------------------- /** * Factory method to create an appropriate Collator which uses the default * locale collation rules. * Current implementation createInstance() returns a RuleBasedCollator(Locale) * instance. The RuleBasedCollator will be created in the following order, * <ul> * <li> Data from argument locale resource bundle if found, otherwise * <li> Data from parent locale resource bundle of arguemtn locale if found, * otherwise * <li> Data from built-in default collation rules if found, other * <li> null is returned * </ul> * @return an instance of Collator * @stable ICU 2.4 */ public static Collator getInstance() { return getInstance(null); } /** * Factory method to create an appropriate Collator which uses the argument * locale collation rules.<br> * Current implementation createInstance() returns a RuleBasedCollator(Locale) * instance. The RuleBasedCollator will be created in the following order, * <ul> * <li> Data from argument locale resource bundle if found, otherwise * <li> Data from parent locale resource bundle of arguemtn locale if found, * otherwise * <li> Data from built-in default collation rules if found, other * <li> null is returned * </ul> * @param locale to be used for collation * @return an instance of Collator * @stable ICU 2.4 */ public static Collator getInstance(Locale locale) { RuleBasedCollator result = new RuleBasedCollator(locale); return result; } /** * Locale dependent equality check for the argument strings. * @param source string * @param target string * @return true if source is equivalent to target, false otherwise * @stable ICU 2.4 */ public boolean equals(String source, String target) { return (compare(source, target) == RESULT_EQUAL); } /** * Checks if argument object is equals to this object. * @param target object * @return true if source is equivalent to target, false otherwise * @stable ICU 2.4 */ public abstract boolean equals(Object target); /** * Makes a copy of the current object. * @return a copy of this object * @stable ICU 2.4 */ public abstract Object clone() throws CloneNotSupportedException; /** * The comparison function compares the character data stored in two * different strings. Returns information about whether a string is less * than, greater than or equal to another string. * <p>Example of use: * <pre> * . Collator myCollation = Collator.getInstance(Locale::US); * . myCollation.setStrength(CollationAttribute.VALUE_PRIMARY); * . // result would be CollationAttribute.VALUE_EQUAL * . // ("abc" == "ABC") * . // (no primary difference between "abc" and "ABC") * . int result = myCollation.compare("abc", "ABC",3); * . myCollation.setStrength(CollationAttribute.VALUE_TERTIARY); * . // result would be Collation.LESS (abc" <<< "ABC") * . // (with tertiary difference between "abc" and "ABC") * . int result = myCollation.compare("abc", "ABC",3); * </pre> * @param source source string. * @param target target string. * @return result of the comparison, Collator.RESULT_EQUAL, * Collator.RESULT_GREATER or Collator.RESULT_LESS * @stable ICU 2.4 */ public abstract int compare(String source, String target); /** * Get the decomposition mode of this Collator. * @return the decomposition mode * @see #CANONICAL_DECOMPOSITION * @see #NO_DECOMPOSITION * @stable ICU 2.4 */ public abstract int getDecomposition(); /** * Set the normalization mode used int this object * The normalization mode influences how strings are compared. * @param mode desired normalization mode * @see #CANONICAL_DECOMPOSITION * @see #NO_DECOMPOSITION * @stable ICU 2.4 */ public abstract void setDecomposition(int mode); /** * Determines the minimum strength that will be use in comparison or * transformation. * <p> * E.g. with strength == SECONDARY, the tertiary difference is ignored * </p> * <p> * E.g. with strength == PRIMARY, the secondary and tertiary difference * are ignored. * </p> * @return the current comparison level. * @see #PRIMARY * @see #SECONDARY * @see #TERTIARY * @see #QUATERNARY * @see #IDENTICAL * @stable ICU 2.4 */ public abstract int getStrength(); /** * Gets the attribute to be used in comparison or transformation. * @param type the attribute to be set from CollationAttribute * @return value attribute value from CollationAttribute * @stable ICU 2.4 */ public abstract int getAttribute(int type); /** * Sets the minimum strength to be used in comparison or transformation. * <p>Example of use: * <pre> * . Collator myCollation = Collator.createInstance(Locale::US); * . myCollation.setStrength(PRIMARY); * . // result will be "abc" == "ABC" * . // tertiary differences will be ignored * . int result = myCollation->compare("abc", "ABC"); * </pre> * @param strength the new comparison level. * @see #PRIMARY * @see #SECONDARY * @see #TERTIARY * @see #QUATERNARY * @see #IDENTICAL * @stable ICU 2.4 */ public abstract void setStrength(int strength); /** * Sets the attribute to be used in comparison or transformation. * <p>Example of use: * <pre> * . Collator myCollation = Collator.createInstance(Locale::US); * . myCollation.setAttribute(CollationAttribute.CASE_LEVEL, * . CollationAttribute.VALUE_ON); * . int result = myCollation->compare("\\u30C3\\u30CF", * . "\\u30C4\\u30CF"); * . // result will be Collator.RESULT_LESS. * </pre> * @param type the attribute to be set from CollationAttribute * @param value attribute value from CollationAttribute * @stable ICU 2.4 */ public abstract void setAttribute(int type, int value); /** * Get the sort key as an CollationKey object from the argument string. * To retrieve sort key in terms of byte arrays, use the method as below<br> * <code> * Collator collator = Collator.getInstance(); * CollationKey collationkey = collator.getCollationKey("string"); * byte[] array = collationkey.toByteArray(); * </code><br> * Byte array result are zero-terminated and can be compared using * java.util.Arrays.equals(); * @param source string to be processed. * @return the sort key * @stable ICU 2.4 */ public abstract CollationKey getCollationKey(String source); /** * Returns a hash of this collation object * @return hash of this collation object * @stable ICU 2.4 */ public abstract int hashCode(); // BEGIN android-added public static Locale[] getAvailableLocales() { String[] locales = NativeCollation.getAvailableLocalesImpl(); Locale[] result = new Locale[locales.length]; String locale; int index, index2; for(int i = 0; i < locales.length; i++) { locale = locales[i]; index = locale.indexOf('_'); index2 = locale.lastIndexOf('_'); if(index == -1) { result[i] = new Locale(locales[i]); } else if(index == 2 && index == index2) { result[i] = new Locale( locale.substring(0,2), locale.substring(3,5)); } else if(index == 2 && index2 > index) { result[i] = new Locale( locale.substring(0,index), locale.substring(index + 1,index2), locale.substring(index2 + 1)); } } return result; } // END android-added }