/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.binarysortable; import java.io.IOException; import java.util.ArrayList; import java.io.EOFException; import java.util.Arrays; import java.util.List; import java.util.Random; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerdeRandomRowSource; import org.apache.hadoop.hive.serde2.VerifyFast; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.lazy.VerifyLazy; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObject; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.BytesWritable; import junit.framework.TestCase; import org.junit.Assert; public class TestBinarySortableFast extends TestCase { private static String debugDetailedReadPositionString; private static StackTraceElement[] debugStackTrace; private void testBinarySortableFast( SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { int rowCount = rows.length; int columnCount = typeInfos.length; boolean[] columnsToInclude = null; if (useIncludeColumns) { columnsToInclude = new boolean[columnCount]; for (int i = 0; i < columnCount; i++) { columnsToInclude[i] = r.nextBoolean(); } } int writeColumnCount = columnCount; if (doWriteFewerColumns) { writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); } BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); // Try to serialize // One Writable per row. BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; int[][] perFieldWriteLengthsArray = new int[rowCount][]; for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; Output output = new Output(); binarySortableSerializeWrite.set(output); int[] perFieldWriteLengths = new int[columnCount]; for (int index = 0; index < writeColumnCount; index++) { VerifyFast.serializeWrite(binarySortableSerializeWrite, typeInfos[index], row[index]); perFieldWriteLengths[index] = output.getLength(); } perFieldWriteLengthsArray[i] = perFieldWriteLengths; BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(output.getData(), 0, output.getLength()); serializeWriteBytes[i] = bytesWritable; if (i > 0) { BytesWritable previousBytesWritable = serializeWriteBytes[i - 1]; int compareResult = previousBytesWritable.compareTo(bytesWritable); if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) { System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i); System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1])); System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i])); fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!"); } } } // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead( typeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); BytesWritable bytesWritable = serializeWriteBytes[i]; binarySortableDeserializeRead.set( bytesWritable.getBytes(), 0, bytesWritable.getLength()); for (int index = 0; index < columnCount; index++) { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead.skipNextField(); } else if (index >= writeColumnCount) { // Should come back a null. VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, typeInfos[index], null); } else { verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]); } } if (writeColumnCount == columnCount) { TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached()); } /* * Clip off one byte and expect to get an EOFException on the write field. */ BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead( typeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); binarySortableDeserializeRead2.set( bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte. for (int index = 0; index < writeColumnCount; index++) { if (index == writeColumnCount - 1) { boolean threw = false; try { verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]); } catch (EOFException e) { // debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString(); // debugStackTrace = e.getStackTrace(); threw = true; } if (!threw && row[index] != null) { Assert.fail(); } } else { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead2.skipNextField(); } else { verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]); } } } } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; // Note that regular SerDe doesn't tolerate fewer columns. List<Object> deserializedRow; if (doWriteFewerColumns) { deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable); } else { deserializedRow = (List<Object>) serde.deserialize(bytesWritable); } Object[] row = rows[i]; for (int index = 0; index < writeColumnCount; index++) { Object expected = row[index]; Object object = deserializedRow.get(index); if (expected == null || object == null) { if (expected != null || object != null) { fail("SerDe deserialized NULL column mismatch"); } } else { if (!object.equals(expected)) { fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")"); } } } } // One Writable per row. BytesWritable serdeBytes[] = new BytesWritable[rowCount]; // Serialize using the SerDe, then below deserialize using DeserializeRead. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; // Since SerDe reuses memory, we will need to make a copy. BytesWritable serialized; if (doWriteFewerColumns) { serialized = (BytesWritable) serde_fewer.serialize(row, rowOI); } else { serialized = (BytesWritable) serde.serialize(row, rowOI);; } BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(serialized); byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); if (!Arrays.equals(serDeOutput, serializeWriteExpected)) { int mismatchPos = -1; if (serDeOutput.length != serializeWriteExpected.length) { for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) { if (serDeOutput[b] != serializeWriteExpected[b]) { mismatchPos = b; break; } } fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i])); } List<Integer> differentPositions = new ArrayList(); for (int b = 0; b < serDeOutput.length; b++) { if (serDeOutput[b] != serializeWriteExpected[b]) { differentPositions.add(b); } } if (differentPositions.size() > 0) { List<String> serializeWriteExpectedFields = new ArrayList<String>(); List<String> serDeFields = new ArrayList<String>(); int f = 0; int lastBegin = 0; for (int b = 0; b < serDeOutput.length; b++) { int writeLength = perFieldWriteLengthsArray[i][f]; if (b + 1 == writeLength) { serializeWriteExpectedFields.add( displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin)); serDeFields.add( displayBytes(serDeOutput, lastBegin, writeLength - lastBegin)); f++; lastBegin = b + 1; } } fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(typeInfos) + "\nrow " + Arrays.toString(row)); } } serdeBytes[i] = bytesWritable; } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. for (int i = 0; i < rowCount; i++) { Object[] row = rows[i]; BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead( typeInfos, /* useExternalBuffer */ false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); BytesWritable bytesWritable = serdeBytes[i]; binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); for (int index = 0; index < columnCount; index++) { if (useIncludeColumns && !columnsToInclude[index]) { binarySortableDeserializeRead.skipNextField(); } else if (index >= writeColumnCount) { // Should come back a null. verifyRead(binarySortableDeserializeRead, typeInfos[index], null); } else { verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]); } } if (writeColumnCount == columnCount) { TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached()); } } } private void verifyRead(BinarySortableDeserializeRead binarySortableDeserializeRead, TypeInfo typeInfo, Object expectedObject) throws IOException { if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) { VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, typeInfo, expectedObject); } else { Object complexFieldObj = VerifyFast.deserializeReadComplexType(binarySortableDeserializeRead, typeInfo); if (expectedObject == null) { if (complexFieldObj != null) { TestCase.fail("Field reports not null but object is null (class " + complexFieldObj.getClass().getName() + ", " + complexFieldObj.toString() + ")"); } } else { if (complexFieldObj == null) { // It's hard to distinguish a union with null from a null union. if (expectedObject instanceof UnionObject) { UnionObject expectedUnion = (UnionObject) expectedObject; if (expectedUnion.getObject() == null) { return; } } TestCase.fail("Field reports null but object is not null (class " + expectedObject.getClass().getName() + ", " + expectedObject.toString() + ")"); } } if (!VerifyLazy.lazyCompare(typeInfo, complexFieldObj, expectedObject)) { TestCase.fail("Comparision failed typeInfo " + typeInfo.toString()); } } } private void testBinarySortableFastCase( int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable { SerdeRandomRowSource source = new SerdeRandomRowSource(); // UNDONE: Until Fast BinarySortable supports complex types -- disable. source.init(r, supportedTypes, depth); int rowCount = 1000; Object[][] rows = source.randomRows(rowCount); if (doNonRandomFill) { MyTestClass.nonRandomRowFill(rows, source.primitiveCategories()); } // We need to operate on sorted data to fully test BinarySortable. source.sort(rows); StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector(); TypeInfo[] typeInfos = source.typeInfos(); int columnCount = typeInfos.length; int writeColumnCount = columnCount; StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector; boolean doWriteFewerColumns = r.nextBoolean(); if (doWriteFewerColumns) { writeColumnCount = 1 + r.nextInt(columnCount); if (writeColumnCount == columnCount) { doWriteFewerColumns = false; } else { writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount); } } String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector); String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector); String order; order = StringUtils.leftPad("", columnCount, '+'); String nullOrder; nullOrder = StringUtils.leftPad("", columnCount, 'a'); AbstractSerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); AbstractSerDe serde_ascending_fewer = null; if (doWriteFewerColumns) { String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); serde_ascending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder); } order = StringUtils.leftPad("", columnCount, '-'); nullOrder = StringUtils.leftPad("", columnCount, 'z'); AbstractSerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); AbstractSerDe serde_descending_fewer = null; if (doWriteFewerColumns) { String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); serde_descending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder); } boolean[] columnSortOrderIsDesc = new boolean[columnCount]; Arrays.fill(columnSortOrderIsDesc, false); byte[] columnNullMarker = new byte[columnCount]; Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); byte[] columnNotNullMarker = new byte[columnCount]; Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); /* * Acending. */ testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */ true, typeInfos, /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */ true, typeInfos, /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); if (doWriteFewerColumns) { testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */ true, typeInfos, /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */ true, typeInfos, /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); } /* * Descending. */ Arrays.fill(columnSortOrderIsDesc, true); testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */ false, typeInfos, /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */ false, typeInfos, /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); if (doWriteFewerColumns) { testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */ false, typeInfos, /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */ false, typeInfos, /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); } } public void testBinarySortableFast(SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable { try { Random r = new Random(35790); int caseNum = 0; for (int i = 0; i < 10; i++) { testBinarySortableFastCase(caseNum, (i % 2 == 0), r, supportedTypes, depth); caseNum++; } } catch (Throwable e) { e.printStackTrace(); throw e; } } public void testBinarySortableFastPrimitive() throws Throwable { testBinarySortableFast(SerdeRandomRowSource.SupportedTypes.PRIMITIVE, 0); } public void testBinarySortableFastComplexDepthOne() throws Throwable { testBinarySortableFast(SerdeRandomRowSource.SupportedTypes.ALL_EXCEPT_MAP, 1); } public void testBinarySortableFastComplexDepthFour() throws Throwable { testBinarySortableFast(SerdeRandomRowSource.SupportedTypes.ALL_EXCEPT_MAP, 4); } private static String displayBytes(byte[] bytes, int start, int length) { StringBuilder sb = new StringBuilder(); for (int i = start; i < start + length; i++) { sb.append(String.format("\\%03d", (int) (bytes[i] & 0xff))); } return sb.toString(); } }