/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.io; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.testng.Assert; import org.testng.annotations.Test; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.io.CompactDeserializer; import com.linkedin.cubert.io.CompactSerializer; import com.linkedin.cubert.io.CompactWritablesDeserializer; public class TestCompactSerialization { private Tuple newNullTuple(int size) { return TupleFactory.getInstance().newTuple(size); } private Tuple newTuple(Object... args) { Tuple tuple = TupleFactory.getInstance().newTuple(args.length); for (int i = 0; i < args.length; i++) { try { tuple.set(i, args[i]); } catch (ExecException e) { e.printStackTrace(); throw new RuntimeException(e); } } return tuple; } private void validate(BlockSchema schema, List<Tuple> list) { try { validate2(schema, list); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } private void validate2(BlockSchema schema, List<Tuple> list) throws IOException { // serialize ByteArrayOutputStream bos = new ByteArrayOutputStream(); CompactSerializer<Tuple> ser = new CompactSerializer<Tuple>(schema); ser.open(bos); for (Tuple tuple : list) ser.serialize(tuple); ser.close(); // deserialize ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); CompactDeserializer<Tuple> des = new CompactDeserializer<Tuple>(schema); des.open(bis); Tuple tuple = null; for (int i = 0; i < list.size(); i++) { tuple = des.deserialize(tuple); Assert.assertEquals(tuple.toString(), list.get(i).toString()); } des.close(); // writables deserializer bis = new ByteArrayInputStream(bos.toByteArray()); CompactWritablesDeserializer<Tuple> des2 = new CompactWritablesDeserializer<Tuple>(schema); des2.open(bis); tuple = null; for (int i = 0; i < list.size(); i++) { tuple = des2.deserialize(tuple); Assert.assertEquals(tuple.toString(), list.get(i).toString()); } des2.close(); } @Test public void testIntegers() { BlockSchema schema = new BlockSchema("int col"); List<Tuple> list = new ArrayList<Tuple>(); // all bit lengths for (int i = 0; i <= 32; i++) { int num = 0; for (int j = 0; j < i; j++) num |= 1 << j; list.add(newTuple(num)); list.add(newTuple(-num)); } // special values list.add(newTuple(Integer.MAX_VALUE)); list.add(newTuple(Integer.MIN_VALUE)); // random values Random rand = new Random(1); for (int i = 0; i < 1000; i++) list.add(newTuple(rand.nextInt())); // null value list.add(newNullTuple(1)); validate(schema, list); } @Test public void testLongs() { BlockSchema schema = new BlockSchema("long col"); List<Tuple> list = new ArrayList<Tuple>(); // all bit lengths for (int i = 0; i <= 64; i++) { long num = 0; for (int j = 0; j < i; j++) num |= 1L << j; list.add(newTuple(num)); list.add(newTuple(-num)); } // special values list.add(newTuple(Long.MAX_VALUE)); list.add(newTuple(Long.MIN_VALUE)); // random values Random rand = new Random(1); for (int i = 0; i < 1000; i++) list.add(newTuple(rand.nextLong())); // null value list.add(newNullTuple(1)); validate(schema, list); } @Test public void testFloats() { BlockSchema schema = new BlockSchema("float col"); List<Tuple> list = new ArrayList<Tuple>(); // all bit lengths for (int i = 0; i <= 32; i++) { int num = 0; for (int j = 0; j < i; j++) num |= 1 << j; float f = (float) num; list.add(newTuple(f)); list.add(newTuple(-f)); } // special values list.add(newTuple(0.0f)); list.add(newTuple(-0.0f)); list.add(newTuple(Float.MIN_VALUE)); list.add(newTuple(Float.MAX_VALUE)); list.add(newTuple(Float.POSITIVE_INFINITY)); list.add(newTuple(Float.NEGATIVE_INFINITY)); // random float values Random rand = new Random(1); for (int i = 0; i < 1000; i++) list.add(newTuple(rand.nextFloat())); // random integers as floats for (int i = 0; i < 1000; i++) list.add(newTuple((float) rand.nextInt())); // null value list.add(newNullTuple(1)); validate(schema, list); } @Test public void testDoubles() { BlockSchema schema = new BlockSchema("double col"); List<Tuple> list = new ArrayList<Tuple>(); // all bit lengths for (int i = 0; i <= 64; i++) { long num = 0; for (int j = 0; j < i; j++) num |= 1L << j; double d = (double) num; list.add(newTuple(d)); list.add(newTuple(-d)); } // special values list.add(newTuple(0.0d)); list.add(newTuple(-0.0d)); list.add(newTuple(Double.MIN_VALUE)); list.add(newTuple(Double.MAX_VALUE)); list.add(newTuple(Double.POSITIVE_INFINITY)); list.add(newTuple(Double.NEGATIVE_INFINITY)); // random float values Random rand = new Random(1); for (int i = 0; i < 1000; i++) list.add(newTuple(rand.nextDouble())); // random longs as floats for (int i = 0; i < 1000; i++) list.add(newTuple((double) rand.nextLong())); // null value list.add(newNullTuple(1)); validate(schema, list); } @Test public void testBooleans() { BlockSchema schema = new BlockSchema("boolean col"); List<Tuple> list = new ArrayList<Tuple>(); list.add(newTuple(true)); list.add(newTuple(false)); list.add(newNullTuple(1)); validate(schema, list); } @Test public void testStrings() { BlockSchema schema = new BlockSchema("string col"); List<Tuple> list = new ArrayList<Tuple>(); list.add(newTuple("test")); list.add(newTuple("")); list.add(newNullTuple(1)); validate(schema, list); } @Test public void testByte() { BlockSchema schema = new BlockSchema("byte col"); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < 256; i++) list.add(newTuple((byte) i)); list.add(newNullTuple(1)); validate(schema, list); } @Test public void testMultiTypes() { BlockSchema schema = new BlockSchema("int col1, long col2, float col3, double col4, boolean col5, byte col6, string col7"); List<Tuple> list = new ArrayList<Tuple>(); Random rand = new Random(1); for (int i = 0; i < 1; i++) { list.add(newTuple(rand.nextInt(), rand.nextLong(), rand.nextFloat(), rand.nextDouble(), rand.nextBoolean(), (byte) rand.nextInt(256), "str" + rand.nextInt())); } validate(schema, list); } @Test public void testMultiTypesWithNull() { BlockSchema schema = new BlockSchema("int col1, long col2, float col3, double col4, boolean col5, byte col6, string col7"); List<Tuple> list = new ArrayList<Tuple>(); Random rand = new Random(1); for (int i = 0; i < 1000; i++) { list.add(newTuple(i % 5 == 0 ? null : rand.nextInt(), i % 6 == 0 ? null : rand.nextLong(), i % 7 == 0 ? null : rand.nextFloat(), i % 8 == 0 ? null : rand.nextDouble(), i % 9 == 0 ? null : rand.nextBoolean(), i % 4 == 0 ? null : (byte) rand.nextInt(256), i % 10 == 0 ? null : "str" + rand.nextInt())); } validate(schema, list); } }