/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Nov 30, 2007
*/
package com.bigdata.btree.keys;
import java.util.Locale;
import org.apache.log4j.Logger;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RawCollationKey;
import com.ibm.icu.text.RuleBasedCollator;
/**
* <p>
* Implementation that uses the ICU library (which supports compressed sort
* keys).
* </p>
* <p>
* This class uses <a href="http://icu.sourceforge.net">ICU4J</a>. There are
* several advantages to the ICU libraries: (1) the collation keys are
* compressed; (2) the libraries are faster than the jdk classes; (3) the
* libraries support Unicode 5; and (4) the libraries have the same behavior
* under Java and C/C++ so you can have interoperable code. There is also JNI
* (Java Native Interface) implementation for many platforms for even greater
* performance and compatibility.
* </p>
* <p>
* Note: The sort keys generated by the ICU libraries critically depend on the
* {@link Locale} and MAY be different depending on the software version of the
* ICU libraries. ICU makes available a version number that is changed each time
* a software revision would result in a change in the generated sort order.
* Further note that the sort keys generated by the ICU libraries are NOT
* compatible with those generated by the JDK {@link java.text.Collator}
* classes.
* </p>
* <p>
* Note: This is a top-level class in order to avoid runtime dependencies on the
* ICU libraries when they are not required.
* </p>
*
* @see http://site.icu-project.org/
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
class ICUSortKeyGenerator implements UnicodeSortKeyGenerator {
private static final Logger log = Logger.getLogger(ICUSortKeyGenerator.class);
/**
* Used to encode unicode strings into compact byte[]s that have the same
* sort order (aka sort keys).
*/
private final RuleBasedCollator collator;
/**
* The {@link Locale} used to configure this object.
*/
private final Locale locale;
/**
* The {@link Locale} used to configure this object.
*/
public Locale getLocale() {
return locale;
}
ICUSortKeyGenerator(final Locale locale, final Object strength, final DecompositionEnum mode) {
if (locale == null)
throw new IllegalArgumentException();
this.locale = locale;
if(log.isInfoEnabled()) log.info("locale="+locale);
this.collator = (RuleBasedCollator) Collator.getInstance(locale);
if (strength != null) {
if (strength instanceof Integer) {
final int str = ((Integer) strength).intValue();
if (log.isInfoEnabled())
log.info("strength=" + str);
collator.setStrength(str);
} else {
final StrengthEnum str = (StrengthEnum) strength;
if (log.isInfoEnabled())
log.info("strength=" + str);
switch (str) {
case Primary:
collator.setStrength(Collator.PRIMARY);
break;
case Secondary:
collator.setStrength(Collator.SECONDARY);
break;
case Tertiary:
collator.setStrength(Collator.TERTIARY);
break;
case Quaternary:
collator.setStrength(Collator.QUATERNARY);
break;
case Identical:
collator.setStrength(Collator.IDENTICAL);
break;
default:
throw new UnsupportedOperationException("strength="
+ strength);
}
}
}
if (mode != null) {
if(log.isInfoEnabled()) log.info("mode="+mode);
switch (mode) {
case None:
collator.setDecomposition(Collator.NO_DECOMPOSITION);
break;
case Full:
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
break;
case Canonical:
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
break;
default:
throw new UnsupportedOperationException("mode=" + mode);
}
}
}
/**
* Buffer is reused for each {@link String} from which a sort key is
* derived.
*/
final private RawCollationKey raw = new RawCollationKey(128);
public void appendSortKey(final KeyBuilder keyBuilder, final String s) {
// RawCollationKey raw = collator.getRawCollationKey(s, null);
collator.getRawCollationKey(s, raw);
keyBuilder.append(raw.bytes, 0, raw.size - 1/*
* do not include the nul
* byte
*/);
}
}