/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.io; import junit.framework.TestCase; import java.io.IOException; import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.util.Random; import com.google.common.primitives.Bytes; /** Unit tests for LargeUTF8. */ public class TestText extends TestCase { private static final int NUM_ITERATIONS = 100; public TestText(String name) { super(name); } private static final Random RANDOM = new Random(1); private static final int RAND_LEN = -1; // generate a valid java String private static String getTestString(int len) throws Exception { StringBuilder buffer = new StringBuilder(); int length = (len==RAND_LEN) ? RANDOM.nextInt(1000) : len; while (buffer.length()<length) { int codePoint = RANDOM.nextInt(Character.MAX_CODE_POINT); char tmpStr[] = new char[2]; if (Character.isDefined(codePoint)) { //unpaired surrogate if (codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT && !Character.isHighSurrogate((char)codePoint) && !Character.isLowSurrogate((char)codePoint)) { Character.toChars(codePoint, tmpStr, 0); buffer.append(tmpStr); } } } return buffer.toString(); } public static String getTestString() throws Exception { return getTestString(RAND_LEN); } public static String getLongString() throws Exception { String str = getTestString(); int length = Short.MAX_VALUE+str.length(); StringBuilder buffer = new StringBuilder(); while(buffer.length()<length) buffer.append(str); return buffer.toString(); } public void testWritable() throws Exception { for (int i = 0; i < NUM_ITERATIONS; i++) { String str; if (i == 0) str = getLongString(); else str = getTestString(); TestWritable.testWritable(new Text(str)); } } public void testCoding() throws Exception { String before = "Bad \t encoding \t testcase"; Text text = new Text(before); String after = text.toString(); assertTrue(before.equals(after)); for (int i = 0; i < NUM_ITERATIONS; i++) { // generate a random string if (i == 0) before = getLongString(); else before = getTestString(); // test string to utf8 ByteBuffer bb = Text.encode(before); byte[] utf8Text = bb.array(); byte[] utf8Java = before.getBytes("UTF-8"); assertEquals(0, WritableComparator.compareBytes( utf8Text, 0, bb.limit(), utf8Java, 0, utf8Java.length)); // test utf8 to string after = Text.decode(utf8Java); assertTrue(before.equals(after)); } } public void testIO() throws Exception { DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); for (int i = 0; i < NUM_ITERATIONS; i++) { // generate a random string String before; if (i == 0) before = getLongString(); else before = getTestString(); // write it out.reset(); Text.writeString(out, before); // test that it reads correctly in.reset(out.getData(), out.getLength()); String after = Text.readString(in); assertTrue(before.equals(after)); // Test compatibility with Java's other decoder int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before)); String after2 = new String(out.getData(), strLenSize, out.getLength()-strLenSize, "UTF-8"); assertTrue(before.equals(after2)); } } public void doTestLimitedIO(String str, int len) throws IOException { DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); out.reset(); try { Text.writeString(out, str, len); fail("expected writeString to fail when told to write a string " + "that was too long! The string was '" + str + "'"); } catch (IOException e) { } Text.writeString(out, str, len + 1); // test that it reads correctly in.reset(out.getData(), out.getLength()); in.mark(len); String after; try { after = Text.readString(in, len); fail("expected readString to fail when told to read a string " + "that was too long! The string was '" + str + "'"); } catch (IOException e) { } in.reset(); after = Text.readString(in, len + 1); assertTrue(str.equals(after)); } public void testLimitedIO() throws Exception { doTestLimitedIO("abcd", 3); doTestLimitedIO("foo bar baz", 10); doTestLimitedIO("1", 0); } public void testCompare() throws Exception { DataOutputBuffer out1 = new DataOutputBuffer(); DataOutputBuffer out2 = new DataOutputBuffer(); DataOutputBuffer out3 = new DataOutputBuffer(); Text.Comparator comparator = new Text.Comparator(); for (int i=0; i<NUM_ITERATIONS; i++) { // reset output buffer out1.reset(); out2.reset(); out3.reset(); // generate two random strings String str1 = getTestString(); String str2 = getTestString(); if (i == 0) { str1 = getLongString(); str2 = getLongString(); } else { str1 = getTestString(); str2 = getTestString(); } // convert to texts Text txt1 = new Text(str1); Text txt2 = new Text(str2); Text txt3 = new Text(str1); // serialize them txt1.write(out1); txt2.write(out2); txt3.write(out3); // compare two strings by looking at their binary formats int ret1 = comparator.compare(out1.getData(), 0, out1.getLength(), out2.getData(), 0, out2.getLength()); // compare two strings int ret2 = txt1.compareTo(txt2); assertEquals(ret1, ret2); // test equal assertEquals(txt1.compareTo(txt3), 0); assertEquals(comparator.compare(out1.getData(), 0, out3.getLength(), out3.getData(), 0, out3.getLength()), 0); } } public void testFind() throws Exception { Text text = new Text("abcd\u20acbdcd\u20ac"); assertTrue(text.find("abd")==-1); assertTrue(text.find("ac")==-1); assertTrue(text.find("\u20ac")==4); assertTrue(text.find("\u20ac", 5)==11); } public void testFindAfterUpdatingContents() throws Exception { Text text = new Text("abcd"); text.set("a".getBytes()); assertEquals(text.getLength(),1); assertEquals(text.find("a"), 0); assertEquals(text.find("b"), -1); } public void testValidate() throws Exception { Text text = new Text("abcd\u20acbdcd\u20ac"); byte [] utf8 = text.getBytes(); int length = text.getLength(); Text.validateUTF8(utf8, 0, length); } public void testClear() throws Exception { // Test lengths on an empty text object Text text = new Text(); assertEquals( "Actual string on an empty text object must be an empty string", "", text.toString()); assertEquals("Underlying byte array length must be zero", 0, text.getBytes().length); assertEquals("String's length must be zero", 0, text.getLength()); // Test if clear works as intended text = new Text("abcd\u20acbdcd\u20ac"); int len = text.getLength(); text.clear(); assertEquals("String must be empty after clear()", "", text.toString()); assertTrue( "Length of the byte array must not decrease after clear()", text.getBytes().length >= len); assertEquals("Length of the string must be reset to 0 after clear()", 0, text.getLength()); } public void testTextText() throws CharacterCodingException { Text a=new Text("abc"); Text b=new Text("a"); b.set(a); assertEquals("abc", b.toString()); a.append("xdefgxxx".getBytes(), 1, 4); assertEquals("modified aliased string", "abc", b.toString()); assertEquals("appended string incorrectly", "abcdefg", a.toString()); // add an extra byte so that capacity = 14 and length = 8 a.append(new byte[]{'d'}, 0, 1); assertEquals(14, a.getBytes().length); assertEquals(8, a.copyBytes().length); } private class ConcurrentEncodeDecodeThread extends Thread { public ConcurrentEncodeDecodeThread(String name) { super(name); } @Override public void run() { String name = this.getName(); DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); for (int i=0; i < 1000; ++i) { try { out.reset(); WritableUtils.writeString(out, name); in.reset(out.getData(), out.getLength()); String s = WritableUtils.readString(in); assertEquals(name, s); } catch (Exception ioe) { throw new RuntimeException(ioe); } } } } public void testConcurrentEncodeDecode() throws Exception{ Thread thread1 = new ConcurrentEncodeDecodeThread("apache"); Thread thread2 = new ConcurrentEncodeDecodeThread("hadoop"); thread1.start(); thread2.start(); thread2.join(); thread2.join(); } public void testAvroReflect() throws Exception { AvroTestUtil.testReflect (new Text("foo"), "{\"type\":\"string\",\"java-class\":\"org.apache.hadoop.io.Text\"}"); } /** * */ public void testCharAt() { String line = "adsawseeeeegqewgasddga"; Text text = new Text(line); for (int i = 0; i < line.length(); i++) { assertTrue("testCharAt error1 !!!", text.charAt(i) == line.charAt(i)); } assertEquals("testCharAt error2 !!!", -1, text.charAt(-1)); assertEquals("testCharAt error3 !!!", -1, text.charAt(100)); } /** * test {@code Text} readFields/write operations */ public void testReadWriteOperations() { String line = "adsawseeeeegqewgasddga"; byte[] inputBytes = line.getBytes(); inputBytes = Bytes.concat(new byte[] {(byte)22}, inputBytes); DataInputBuffer in = new DataInputBuffer(); DataOutputBuffer out = new DataOutputBuffer(); Text text = new Text(line); try { in.reset(inputBytes, inputBytes.length); text.readFields(in); } catch(Exception ex) { fail("testReadFields error !!!"); } try { text.write(out); } catch(IOException ex) { } catch(Exception ex) { fail("testReadWriteOperations error !!!"); } } /** * test {@code Text.bytesToCodePoint(bytes) } * with {@code BufferUnderflowException} * */ public void testBytesToCodePoint() { try { ByteBuffer bytes = ByteBuffer.wrap(new byte[] {-2, 45, 23, 12, 76, 89}); Text.bytesToCodePoint(bytes); assertTrue("testBytesToCodePoint error !!!", bytes.position() == 6 ); } catch (BufferUnderflowException ex) { fail("testBytesToCodePoint unexp exception"); } catch (Exception e) { fail("testBytesToCodePoint unexp exception"); } } public void testbytesToCodePointWithInvalidUTF() { try { Text.bytesToCodePoint(ByteBuffer.wrap(new byte[] {-2})); fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!"); } catch (BufferUnderflowException ex) { } catch(Exception e) { fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!"); } } public void testUtf8Length() { assertEquals("testUtf8Length1 error !!!", 1, Text.utf8Length(new String(new char[]{(char)1}))); assertEquals("testUtf8Length127 error !!!", 1, Text.utf8Length(new String(new char[]{(char)127}))); assertEquals("testUtf8Length128 error !!!", 2, Text.utf8Length(new String(new char[]{(char)128}))); assertEquals("testUtf8Length193 error !!!", 2, Text.utf8Length(new String(new char[]{(char)193}))); assertEquals("testUtf8Length225 error !!!", 2, Text.utf8Length(new String(new char[]{(char)225}))); assertEquals("testUtf8Length254 error !!!", 2, Text.utf8Length(new String(new char[]{(char)254}))); } public static void main(String[] args) throws Exception { TestText test = new TestText("main"); test.testIO(); test.testCompare(); test.testCoding(); test.testWritable(); test.testFind(); test.testValidate(); } }