/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Apr 30, 2007 */ package com.bigdata.btree.keys; import java.util.Arrays; import java.util.Locale; import java.util.Properties; import com.bigdata.btree.keys.KeyBuilder.Options; import com.bigdata.util.BytesUtil; import com.ibm.icu.text.Collator; /** * Tests for Unicode support in {@link KeyBuilder}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public class TestICUUnicodeKeyBuilder extends AbstractUnicodeKeyBuilderTestCase { /** * */ public TestICUUnicodeKeyBuilder() { } /** * @param arg0 */ public TestICUUnicodeKeyBuilder(String arg0) { super(arg0); } public Properties getProperties() { final Properties properties = new Properties(super.getProperties()); properties.setProperty(Options.COLLATOR,CollatorEnum.ICU.toString()); return properties; } public void test_correctCollator() { final Properties properties = getProperties(); if (log.isInfoEnabled()) log.info("properties=" + properties); final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder .newUnicodeInstance(properties); assertEquals(ICUSortKeyGenerator.class, keyBuilder .getSortKeyGenerator().getClass()); } /** * Test examines the behavior when the * {@link SuccessorUtil#successor(String)} of an Unicode string is formed by * appending a <code>nul</code> character and reports an error if the * resulting byte[] when the key are formed compares as equal to the * original string from which the successor was formed. * <p> * Note: Since {@link Collator#IDENTICAL} appears to be required to * differentiate a trailing nul character (i.e., the successor of some * Unicode string), then I would strongly recommend that you form the sort * key first and then its successor (by appending a trailing nul). */ public void test_keyBuilder_unicode_trailingNuls() { /* * Setup for US English. */ final Properties properties = new Properties(); properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage()); properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry()); final int[] strengths = new int[] { Collator.PRIMARY, Collator.SECONDARY, Collator.TERTIARY, Collator.QUATERNARY, Collator.IDENTICAL, }; int minStrength = -1; for(int i=0; i<strengths.length; i++) { final int strength = strengths[i]; // set the strength on the collator. properties.setProperty(Options.STRENGTH, ""+Collator.IDENTICAL); // RuleBasedCollator collator = (RuleBasedCollator) Collator // .getInstance(Locale.getDefault()); // // collator.setStrength(strength); if(!doSuccessorTest( "Hello World!", properties)) { log.warn("Collator does not differentiate trailing nul characters at strength="+strength); } else { minStrength = strength; } } assertFalse( "Collator will not differentiate trailing nul characters at any strength.", minStrength == -1); System.err .println("Minimum strength ("+minStrength+") to differentiate trailing nul character is: " + (minStrength == Collator.PRIMARY ? "PRIMARY" : (minStrength == Collator.SECONDARY ? "SECONDARY" : (minStrength == Collator.TERTIARY ? "TERTIARY" : (minStrength == Collator.QUATERNARY ? "QUARERNARY" : (minStrength == Collator.IDENTICAL ? "IDENTICAL" : "" + minStrength)))))); } /** * Test whether or not the {@link Collator} will differentiate Unicode * strings that differ only in a trailing <code>nul</code> character. * * @param s * The Unicode string. * @param collator * The collator. * * @return True iff the collector differentiates between the string and its * successor (formed by appending a nul character) in its generated * sort keys. */ protected boolean doSuccessorTest(String s, Properties properties) { final DefaultKeyBuilderFactory factory = new DefaultKeyBuilderFactory(properties); assertEquals(factory.getCollator(), CollatorEnum.ICU); assertEquals(factory.getLocale().getLanguage(), Locale.US.getLanguage()); assertEquals(factory.getLocale().getCountry(), Locale.US.getCountry()); final IKeyBuilder keyBuilder = factory.getKeyBuilder(); final String successor = SuccessorUtil.successor(s); // the successor is one character longer. assertEquals(s.length() + 1, successor.length()); final byte[] key1 = keyBuilder.reset().append(s).getKey(); final byte[] key2 = keyBuilder.reset().append(successor).getKey(); // key1 MUST order less than key2. final int ret = BytesUtil.compareBytes(key1, key2); if (ret >= 0) { log .warn("Key1 does NOT order less than successor(key1) : comparator returns " + ret); System.err.println("text=" + s); System.err.println("strength=" + properties.getProperty(Options.STRENGTH)); System.err.println("key1: " + Arrays.toString(key1)); System.err.println("key2: " + Arrays.toString(key2)); return false; } return true; } }