/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Nov 29, 2007
*/
package com.bigdata.btree.keys;
import java.util.Arrays;
import java.util.Locale;
import java.util.Properties;
import junit.framework.TestCase2;
import com.bigdata.btree.keys.DefaultKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.keys.StrengthEnum;
import com.bigdata.btree.keys.KeyBuilder.Options;
import com.bigdata.util.BytesUtil;
/**
* Base class for the test suites that examine support for Unicode sort keys in
* {@link KeyBuilder}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
abstract public class AbstractUnicodeKeyBuilderTestCase extends TestCase2 {
/**
*
*/
public AbstractUnicodeKeyBuilderTestCase() {
}
/**
* @param arg0
*/
public AbstractUnicodeKeyBuilderTestCase(String arg0) {
super(arg0);
}
/**
* Test ability to encode unicode data into a variable length byte[] that
* allows direct byte-by-byte comparisons which maintain the local-specific
* sort order of the original strings.
*/
public void test_keyBuilder_unicode_string_key_us_primary() {
/*
* Setup an instance for US English with strength := PRIMARY.
*/
final Properties properties = getProperties();
properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());
properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
properties.setProperty(Options.STRENGTH, StrengthEnum.Primary
.toString());
final DefaultKeyBuilderFactory factory = new DefaultKeyBuilderFactory(
properties);
assertEquals(Locale.US.getLanguage(), factory.getLocale().getLanguage());
assertEquals(Locale.US.getCountry(), factory.getLocale().getCountry());
assertEquals(StrengthEnum.Primary, factory.getStrength());
final IKeyBuilder keyBuilder = factory.getKeyBuilder();
// // verify assumption under that configuration.
// {
// RuleBasedCollator usCollator = (RuleBasedCollator) Collator
// .getInstance(Locale.US);
//
// usCollator.setStrength(Collator.PRIMARY);
//
// assertEquals(0, usCollator.compare("abc", "ABC"));
// }
byte[] key1 = keyBuilder.reset().append("abc").getKey();
byte[] key2 = keyBuilder.reset().append("ABC").getKey();
byte[] key3 = keyBuilder.reset().append("Abc").getKey();
System.err.println("abc: "+BytesUtil.toString(key1));
System.err.println("ABC: "+BytesUtil.toString(key2));
System.err.println("Abc: "+BytesUtil.toString(key3));
// all are equal using PRIMARY strength.
assertEquals(0,BytesUtil.compareBytes(key1, key2));
assertEquals(0,BytesUtil.compareBytes(key2, key3));
}
public void test_keyBuilder_unicode_string_key_us_identical() {
/*
* Setup an instance for US English with strength := IDENTICAL.
*/
final Properties properties = new Properties();
properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());
properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
properties.setProperty(Options.STRENGTH, ""+StrengthEnum.Identical);
final IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(properties);
// // verify assumption under that configuration.
// {
// RuleBasedCollator usCollator = (RuleBasedCollator) Collator
// .getInstance(Locale.US);
//
// usCollator.setStrength(Collator.IDENTICAL);
//
// assertNotSame(0, usCollator.compare("abc", "ABC"));
// }
// IKeyBuilder keyBuilder = new UnicodeKeyBuilder(usCollator,1000);
byte[] key1 = keyBuilder.reset().append("abc").getKey();
byte[] key2 = keyBuilder.reset().append("ABC").getKey();
byte[] key3 = keyBuilder.reset().append("Abc").getKey();
System.err.println("abc: "+BytesUtil.toString(key1));
System.err.println("ABC: "+BytesUtil.toString(key2));
System.err.println("Abc: "+BytesUtil.toString(key3));
// verify ordering for IDENTICAL comparison.
assertTrue(BytesUtil.compareBytes(key1, key2)<0);
assertTrue(BytesUtil.compareBytes(key2, key3)>0);
}
/**
* Test verifies that the trailing <code>nul</code> byte is not part of
* the key when a unicode string is appended to an {@link IKeyBuilder}.
* <p>
* Note: The trailing <code>nul</code> byte is appended by the ICU library
* in order to have compatibility with their C library, but it is not of
* interest for Java processing. However, note that a <code>nul</code>
* byte MAY be used to separate components of a complex key.
*/
public void test_keyBuilder_unicode_String_noTrailingNul() {
final IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(getProperties());
keyBuilder.append("Hello World!");
final byte[] key = keyBuilder.getKey();
assertNotSame("Not expecting a trailing nul byte.", (byte) 0,
key[key.length - 1]);
}
/*
* Note: This test has been disabled. The SparseRowStore no longer makes
* this assumption in order to be compatible with the JDK collator.
*/
// /**
// * Test verifies that the <code>nul</code> byte is not part of the key when
// * a unicode string is appended to an {@link IKeyBuilder}.
// * <p>
// * Note: The {@link SparseRowStore} assumes that Unicode sort keys do not
// * contain embedded <code>nul</code>s.
// */
// public void test_keyBuilder_unicode_String_noEmbeddedNuls() {
//
// final IKeyBuilder keyBuilder = KeyBuilder
// .newUnicodeInstance(getProperties());
//
// keyBuilder.append("Hello World!");
//
// final byte[] key = keyBuilder.getKey();
//
// for (int i = 0; i < key.length; i++) {
// if (key[i] == 0)
// fail("Embedded nuls: key=" + BytesUtil.toString(key));
// }
//
// }
/**
* Test of the ability to normalize trailing pad characters.
*/
public void test_keyBuilder_normalizeTrailingPadCharacters() {
final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
.newUnicodeInstance(getProperties());
assertEquals(//
keyBuilder.normalizeText(""),//
keyBuilder.normalizeText(" ")//
);
assertEquals(//
keyBuilder.normalizeText(""),//
keyBuilder.normalizeText(" ")//
);
assertEquals(//
keyBuilder.normalizeText(""),//
keyBuilder.normalizeText(" ")//
);
assertEquals(//
keyBuilder.normalizeText(" "),//
keyBuilder.normalizeText(" ")//
);
assertEquals(//
keyBuilder.normalizeText("abc"),//
keyBuilder.normalizeText("abc ")//
);
assertEquals(//
keyBuilder.normalizeText(" abc"),//
keyBuilder.normalizeText(" abc ")//
);
assertNotSame(//
keyBuilder.normalizeText("abc"),//
keyBuilder.normalizeText(" abc ")//
);
}
/**
* Test verifies that very long strings are truncated.
*
* @todo verify that trailing whitespace is removed after truncation rather
* than before truncation.
*/
public void test_keyBuilder_normalizeTruncatesVeryLongStrings() {
final KeyBuilder keyBuilder = (KeyBuilder)KeyBuilder.newUnicodeInstance(getProperties());
final String text = TestKeyBuilder.getMaximumLengthText();
assertEquals(//
keyBuilder.normalizeText(text),//
keyBuilder.normalizeText(text+"abc")//
);
}
/**
* Test verifies the order among unicode sort keys, including verifying that
* the pad byte causes a prefix such as "bro" to sort before a term which
* extends that prefix, such as "brown".
*/
public void test_keyBuilder_unicode_order() {
final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder.newUnicodeInstance(getProperties());
final KVO<String>[] a = new KVO[] {
new KVO<String>(TestKeyBuilder.asSortKey("bro"),null,"bro"),
new KVO<String>(TestKeyBuilder.asSortKey("brown"),null,"brown"),
new KVO<String>(TestKeyBuilder.asSortKey("bre"),null,"bre"),
new KVO<String>(TestKeyBuilder.asSortKey("break"),null,"break"),
};
// sort by the assigned sort keys.
Arrays.sort(a);
/*
* verify that "bre(ak)" is before "bro(wn)" and that "bre" is before
* "break" and "bro" is before "brown".
*/
assertEquals("bre", a[0].obj);
assertEquals("break", a[1].obj);
assertEquals("bro", a[2].obj);
assertEquals("brown", a[3].obj);
}
/**
* <p>
* Test that lexiographic order is maintain when a variable length Unicode
* field is followed by another field. This test works by comparing the
* original multi-field key with the multi-field key formed from the
* successor of the Unicode field followed by the other field:
* </p>
*
* <pre>
*
* [text][nextValue] LT [successor(text)][nextValue]
*
* </pre>
*/
public void test_keyBuilder_multiField_unicode() {
final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
.newUnicodeInstance(getProperties());
TestKeyBuilder.doMultiFieldTests(true/* unicode */, keyBuilder);
/*
* Now test some strings that contain code points outside of the 8-bit
* range.
*/
// final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
// .newUnicodeInstance();
final boolean unicode = true;
{
// Note: This is "Japanese" in kanji.
String text = "\u65E5\u672C\u8A9E / \u306B\u307B\u3093\u3054";
TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, (byte) 0);
TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, (byte) 1);
TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, (byte) -1);
TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, Byte.MIN_VALUE);
TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, Byte.MAX_VALUE);
}
}
}