/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.io; import junit.framework.TestCase; import java.util.Arrays; import java.util.Random; import org.apache.commons.lang.RandomStringUtils; /** Unit tests for UTF8. */ @SuppressWarnings("deprecation") public class TestUTF8 extends TestCase { public TestUTF8(String name) { super(name); } private static final Random RANDOM = new Random(); public static String getTestString() throws Exception { StringBuffer buffer = new StringBuffer(); int length = RANDOM.nextInt(1010); for (int i = 0; i < length; i++) { buffer.append((char)(RANDOM.nextInt(Character.MAX_VALUE))); } return buffer.toString(); } public void testWritable() throws Exception { for (int i = 0; i < 10000; i++) { TestWritable.testWritable(new UTF8(getTestString())); } } public void testGetBytes() throws Exception { for (int i = 0; i < 10000; i++) { // generate a random string String before = getTestString(); // check its utf8 assertEquals(before, new String(UTF8.getBytes(before), "UTF-8")); } } public void testIO() throws Exception { DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); for (int i = 0; i < 10000; i++) { // generate a random string String before = getTestString(); // write it out.reset(); UTF8.writeString(out, before); // test that it reads correctly in.reset(out.getData(), out.getLength()); String after = UTF8.readString(in); assertTrue(before.equals(after)); // test that it reads correctly with DataInput in.reset(out.getData(), out.getLength()); String after2 = in.readUTF(); assertTrue(before.equals(after2)); // test that it is compatible with Java's other decoder String after3 = new String(out.getData(), 2, out.getLength()-2, "UTF-8"); assertTrue(before.equals(after3)); } } public void testOpt() throws Exception { testOpt(false); testOpt(true); } /** * Test optimized writes * @param utf - true for unicode, false for ascii */ private void testOpt(boolean unicode) throws Exception { DataOutputBuffer outOpt = new DataOutputBuffer(); DataOutputBuffer outReg = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); for (int i = 0; i < 10000; i++) { // generate a random string String before = null; if(unicode) { before = RandomStringUtils.random(1010); } else { before = RandomStringUtils.randomAscii(1010); } // write it outOpt.reset(); outReg.reset(); UTF8.writeStringOpt(outOpt, before); UTF8.writeString(outReg, before); // test that it reads correctly (opt) in.reset(outOpt.getData(), outOpt.getLength()); String afterOpt = UTF8.readString(in); assertTrue(before.equals(afterOpt)); // test that it reads correctly (reg) in.reset(outReg.getData(), outReg.getLength()); String afterReg = UTF8.readString(in); assertTrue(before.equals(afterReg)); // test setters UTF8 optUTF8 = new UTF8(); optUTF8.set(before, true); UTF8 regUTF8 = new UTF8(); regUTF8.set(before, false); assertEquals(optUTF8.toString(), regUTF8.toString()); assertTrue(Arrays.equals(UTF8.getBytes(before), optUTF8.getBytes())); assertTrue(Arrays.equals(UTF8.getBytes(before), regUTF8.getBytes())); } } public void testNullEncoding() throws Exception { String s = new String(new char[] { 0 }); DataOutputBuffer dob = new DataOutputBuffer(); new UTF8(s).write(dob); assertEquals(s, new String(dob.getData(), 2, dob.getLength()-2, "UTF-8")); } }