/*
* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.memory;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.block.ColumnType;
import com.linkedin.cubert.block.DataType;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
import com.linkedin.cubert.utils.DataGenerator;
import java.util.Iterator;
/**
* Unit Test class for IntArrayList, LongArrayList, DoubleArrayList, SegmentedArrayList
*
* Created by spyne on 1/8/15.
*
*/
public class TestSegmentedArrayLists
{
@Test
public void testIntArrayListAddAndGet() throws Exception
{
IntArrayList list = new IntArrayList(101);
DataGenerator dgen = new DataGenerator();
final int size = 1000;
final int[] ints = dgen.randomInts(size);
for (int i = 0; i < size; ++i)
{
list.addInt(ints[i]);
}
Assert.assertEquals(size, list.size());
for (int i = 0; i < size; ++i)
{
Assert.assertEquals(ints[i], list.get(i));
}
for (int i = 0; i < size - 1; ++i)
{
final Integer act1 = ints[i], act2 = ints[i+1];
Assert.assertEquals(act1.compareTo(act2), list.compareIndices(i, i + 1));
}
}
private static int upperBound(int number, int multipleOf)
{
if (number % multipleOf == 0)
return number;
return ((number / multipleOf) + 1) * multipleOf;
}
@Test
public void testIntArrayGrowability() throws Exception
{
final int BATCH_SIZE = 10;
IntArrayList list = new IntArrayList(BATCH_SIZE);
final int MINUS_FOUR = -4;
list.setDefaultValue(MINUS_FOUR);
// ensure that it can hold 25 elements
final int INITIAL_SIZE = 25;
list.ensureCapacity(INITIAL_SIZE);
Assert.assertEquals(list.capacity(), upperBound(INITIAL_SIZE, BATCH_SIZE));
// test that all 25 elements are set to default value
for (int i = 0; i < INITIAL_SIZE; i++)
{
Assert.assertEquals(list.getInt(i), MINUS_FOUR);
}
// update values for some elements
final int NEW_VALUE = 3;
for (int i = 0; i < 10; i++)
{
list.updateInt(i, NEW_VALUE);
}
// test
for (int i = 0; i < 25; i++)
{
Assert.assertEquals(list.getInt(i), i < 10 ? NEW_VALUE : MINUS_FOUR);
}
// resize
final int INCREASED_SIZE = 39;
list.ensureCapacity(INCREASED_SIZE);
Assert.assertEquals(list.capacity(), upperBound(INCREASED_SIZE, BATCH_SIZE));
// test values are not affected by growing
for (int i = 0; i < INCREASED_SIZE; i++)
{
Assert.assertEquals(list.getInt(i), i < 10 ? NEW_VALUE : MINUS_FOUR);
}
// reset
final int RESET_SIZE = 12;
list.reset(RESET_SIZE);
Assert.assertEquals(list.capacity(), upperBound(RESET_SIZE, BATCH_SIZE));;
// test values are reset as well
for (int i = 0; i < RESET_SIZE; i++)
{
Assert.assertEquals(list.getInt(i), MINUS_FOUR);
}
}
@Test
public void testLongArrayListAddAndGet() throws Exception
{
LongArrayList list = new LongArrayList(101);
DataGenerator dgen = new DataGenerator();
final int size = 1000;
final long[] longs = dgen.randomLongs(size);
for (int i = 0; i < size; ++i)
{
list.addLong(longs[i]);
}
Assert.assertEquals(size, list.size());
for (int i = 0; i < size; ++i)
{
Assert.assertEquals(longs[i], list.get(i));
}
for (int i = 0; i < size - 1; ++i)
{
final Long act1 = longs[i], act2 = longs[i+1];
Assert.assertEquals(act1.compareTo(act2), list.compareIndices(i, i + 1));
}
}
@Test
public void testDoubleArrayListAddAndGet() throws Exception
{
DoubleArrayList list = new DoubleArrayList(101);
DataGenerator dgen = new DataGenerator();
final int size = 1000;
final double[] doubles = dgen.randomDoubles(size);
for (int i = 0; i < size; ++i)
{
list.add(doubles[i]);
}
Assert.assertEquals(size, list.size());
for (int i = 0; i < size; ++i)
{
Assert.assertEquals(doubles[i], list.get(i));
}
for (int i = 0; i < size - 1; ++i)
{
final Double act1 = doubles[i], act2 = doubles[i+1];
Assert.assertEquals(act1.compareTo(act2), list.compareIndices(i, i + 1));
}
}
@Test
public void testSegmentedArrayListAddAndGet() throws Exception
{
ObjectArrayList list = new ObjectArrayList(101);
DataGenerator dgen = new DataGenerator();
final int size = 1000;
final String[] strings = dgen.randomStrings(size);
for (int i = 0; i < size; ++i)
{
list.add(strings[i]);
}
Assert.assertEquals(size, list.size());
for (int i = 0; i < size; ++i)
{
Assert.assertEquals(strings[i], list.get(i));
}
for (int i = 0; i < size - 1; ++i)
{
final String act1 = strings[i], act2 = strings[i+1];
Assert.assertEquals(act1.compareTo(act2), list.compareIndices(i, i + 1));
}
}
@Test
public void testBagArrayList() throws Exception
{
SegmentedArrayList array = new BagArrayList(new BlockSchema("INT a, DOUBLE b, STRING c"), false);
final int N = 10000;
DataBag[] bags = new DataBag[N];
int counter = 0;
for (int i = 0; i < N; i++)
{
Tuple[] tuplesInBag = new Tuple[(i % 5) + 1];
for (int j = 0; j < tuplesInBag.length; j++)
{
tuplesInBag[j] = createTuple(counter, counter * 1.0, Integer.toString(counter));
counter++;
}
bags[i] = createBag(tuplesInBag);
}
for (DataBag bag: bags)
array.add(bag);
Assert.assertEquals(array.size, N);
for (int i = 0; i < bags.length; i++)
{
assertBagEqual((DataBag) array.get(i), bags[i]);
}
}
@Test
public void testNestedSchema() throws Exception
{
ColumnType tupleFieldType =
new ColumnType("element", DataType.TUPLE, new BlockSchema("STRING name, STRING term, FLOAT value"));
BlockSchema tupleSchema = new BlockSchema(new ColumnType[] { tupleFieldType });
ColumnType bagType = new ColumnType("bag", DataType.BAG, tupleSchema);
BlockSchema schema = new BlockSchema(new ColumnType[]
{
new ColumnType("member_id", DataType.INT),
bagType
});
final int N = 10000;
int counter = 1;
Tuple[] data = new Tuple[N];
for (int i = 0; i < N; i++)
{
Tuple[] tuplesInBag = new Tuple[(i % 5) + 1];
for (int j = 0; j < tuplesInBag.length; j++)
{
tuplesInBag[j] = createTuple("name " + counter,
"term " + counter,
(counter % 3 == 0) ? null : counter * 1.0f);
counter++;
}
DataBag bag = createBag(tuplesInBag);
data[i] = createTuple(i, bag);
}
ColumnarTupleStore store = new ColumnarTupleStore(schema, true);
for (Tuple t: data)
store.addToStore(t);
Assert.assertEquals(store.getNumTuples(), N);
for (int i = 0; i < N; i++)
{
Tuple actual = store.getTuple(i, null);
Tuple expected = data[i];
Assert.assertEquals(actual.get(0), expected.get(0));
assertBagEqual((DataBag) actual.get(1), (DataBag) expected.get(1));
}
}
private Tuple createTuple(Object... args) throws ExecException
{
Tuple tuple = TupleFactory.getInstance().newTuple(args.length);
for (int i = 0; i < args.length; i++)
{
tuple.set(i, args[i]);
}
return tuple;
}
private DataBag createBag(Tuple... tuples)
{
DataBag bag = BagFactory.getInstance().newDefaultBag();
for (Tuple tuple: tuples)
bag.add(tuple);
return bag;
}
private void assertBagEqual(DataBag bag1, DataBag bag2)
{
Iterator<Tuple> it1 = bag1.iterator();
Iterator<Tuple> it2 = bag2.iterator();
while (it1.hasNext())
{
Assert.assertTrue(it2.hasNext());
Tuple tuple1 = it1.next();
Tuple tuple2 = it2.next();
Assert.assertEquals(tuple1, tuple2, tuple1.toString() + " != " + tuple2.toString());
}
Assert.assertFalse(it2.hasNext());
}
}