/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package java.text; import libcore.icu.RuleBasedCollatorICU; /** * A concrete subclass of {@link Collator}. * It is based on the ICU RuleBasedCollator which implements the * CLDR and Unicode collation algorithms. * * <p>Most of the time, you create a {@link Collator} instance for a {@link java.util.Locale} * by calling the {@link Collator#getInstance} factory method. * You can construct a {@code RuleBasedCollator} if you need a custom sort order. * * <p>The root collator's sort order is the CLDR root collation order * which in turn is the Unicode default sort order with a few modifications. * A {@code RuleBasedCollator} is built from a rule {@code String} which changes the * sort order of some characters and strings relative to the default order. * * <p>A rule string usually contains one or more rule chains. * A rule chain consists of a reset followed by one or more rules. * The reset anchors the following rules in the default sort order. * The rules change the order of the their characters and strings * relative to the reset point. * * <p>A reset is an ampersand {@code &} followed by one or more characters for the reset position. * A rule is a relation operator, which specifies the level of difference, * also followed by one or more characters. * A multi-character rule creates a "contraction". * A multi-character reset position usually creates "expansions". * * <p>For example, the following rules * make "ä" sort with a diacritic-like (secondary) difference from "ae" * (like in German phonebook sorting), * and make "å" and "aa" sort as a base letter (primary) after "z" (like in Danish). * Uppercase forms sort with a case-like (tertiary) difference after their lowercase forms. * * <blockquote> * <pre> * &AE<<ä <<<Ä * &z<å<<<Å<<<aa<<<Aa<<<AA * </pre> * </blockquote> * * <p>For details see * <ul> * <li>CLDR <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Rules">Collation Rule Syntax</a> * <li>ICU User Guide <a href="http://userguide.icu-project.org/collation/customization">Collation Customization</a> * </ul> * * <p>Note: earlier versions of {@code RuleBasedCollator} up to and including Android 4.4 (KitKat) * allowed the omission of the reset from the first rule chain. * This was interpreted as an implied reset after the last non-Han script in the default order. * However, this is not a useful reset position, except for large tailorings of * Han characters themselves. * Starting with the CLDR 24 collation specification and the ICU 53 implementation, * the initial reset is required. * * <p>If the rule string does not follow the syntax, then {@code RuleBasedCollator} throws a * {@code ParseException}. */ public class RuleBasedCollator extends Collator { RuleBasedCollator(RuleBasedCollatorICU wrapper) { super(wrapper); } /** * Constructs a new instance of {@code RuleBasedCollator} using the * specified {@code rules}. (See the {@link RuleBasedCollator class description}.) * <p> * Note that the {@code rules} are interpreted as a delta to the * default sort order. This differs * from other implementations which work with full {@code rules} * specifications and may result in different behavior. * * @param rules * the collation rules. * @throws NullPointerException * if {@code rules == null}. * @throws ParseException * if {@code rules} contains rules with invalid collation rule * syntax. */ public RuleBasedCollator(String rules) throws ParseException { if (rules == null) { throw new NullPointerException("rules == null"); } try { icuColl = new RuleBasedCollatorICU(rules); } catch (Exception e) { if (e instanceof ParseException) { throw (ParseException) e; } /* * -1 means it's not a ParseException. Maybe IOException thrown when * an error occurred while reading internal data. */ throw new ParseException(e.getMessage(), -1); } } /** * Obtains a {@code CollationElementIterator} for the given * {@code CharacterIterator}. The source iterator's integrity will be * preserved since a new copy will be created for use. * * @param source * the source character iterator. * @return a {@code CollationElementIterator} for {@code source}. */ public CollationElementIterator getCollationElementIterator(CharacterIterator source) { if (source == null) { throw new NullPointerException("source == null"); } return new CollationElementIterator(icuColl.getCollationElementIterator(source)); } /** * Obtains a {@code CollationElementIterator} for the given string. * * @param source * the source string. * @return the {@code CollationElementIterator} for {@code source}. */ public CollationElementIterator getCollationElementIterator(String source) { if (source == null) { throw new NullPointerException("source == null"); } return new CollationElementIterator(icuColl.getCollationElementIterator(source)); } /** * Returns the collation rules of this collator. These {@code rules} can be * fed into the {@code RuleBasedCollator(String)} constructor. * * <p>The returned string will be empty unless you constructed the instance yourself. * The string forms of the collation rules are omitted to save space on the device. */ public String getRules() { return icuColl.getRules(); } /** * Returns a new collator with the same collation rules, decomposition mode and * strength value as this collator. * * @return a shallow copy of this collator. * @see java.lang.Cloneable */ @Override public Object clone() { RuleBasedCollator clone = (RuleBasedCollator) super.clone(); return clone; } /** * Compares the {@code source} text to the {@code target} text according to * the collation rules, strength and decomposition mode for this * {@code RuleBasedCollator}. See the {@code Collator} class description * for an example of use. * * @param source * the source text. * @param target * the target text. * @return an integer which may be a negative value, zero, or else a * positive value depending on whether {@code source} is less than, * equivalent to, or greater than {@code target}. */ @Override public int compare(String source, String target) { if (source == null) { throw new NullPointerException("source == null"); } else if (target == null) { throw new NullPointerException("target == null"); } return icuColl.compare(source, target); } /** * Returns the {@code CollationKey} for the given source text. * * @param source * the specified source text. * @return the {@code CollationKey} for the given source text. */ @Override public CollationKey getCollationKey(String source) { return icuColl.getCollationKey(source); } @Override public int hashCode() { return icuColl.getRules().hashCode(); } /** * Compares the specified object with this {@code RuleBasedCollator} and * indicates if they are equal. In order to be equal, {@code object} must be * an instance of {@code Collator} with the same collation rules and the * same attributes. * * @param obj * the object to compare with this object. * @return {@code true} if the specified object is equal to this * {@code RuleBasedCollator}; {@code false} otherwise. * @see #hashCode */ @Override public boolean equals(Object obj) { if (!(obj instanceof Collator)) { return false; } return super.equals(obj); } }