/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.vector;
import java.util.Random;
import junit.framework.Assert;
import org.junit.Test;
/**
* Test creation and basic manipulation of VectorizedRowBatch.
*/
public class TestVectorizedRowBatch {
// test fields
static final String[] COLORS = {"red", "yellow", "green", "blue", "violet", "orange"};
private static byte[][] colorsBytes;
private VectorizedRowBatch makeBatch() {
VectorizedRowBatch batch = new VectorizedRowBatch(3);
LongColumnVector lv = new LongColumnVector();
DoubleColumnVector dv = new DoubleColumnVector();
BytesColumnVector bv = new BytesColumnVector();
setSampleStringCol(bv);
batch.cols[0] = lv;
batch.cols[1] = dv;
batch.cols[2] = bv;
addRandomNulls(batch);
return batch;
}
@Test
/**
* Make sure you can create a batch and that all columns are the
* default size.
*/
public void testVectorizedRowBatchCreate() {
VectorizedRowBatch batch = makeBatch();
Assert.assertEquals(3, batch.numCols);
Assert.assertEquals(VectorizedRowBatch.DEFAULT_SIZE, batch.size);
Assert.assertEquals(((LongColumnVector) batch.cols[0]).vector.length,
VectorizedRowBatch.DEFAULT_SIZE);
Assert.assertEquals(((DoubleColumnVector) batch.cols[1]).vector.length,
VectorizedRowBatch.DEFAULT_SIZE);
Assert.assertEquals(((BytesColumnVector) batch.cols[2]).vector.length,
VectorizedRowBatch.DEFAULT_SIZE);
}
/*
* Test routines to exercise VectorizedRowBatch
* by filling column vectors with data and null values.
*/
public static void setRandom(VectorizedRowBatch batch) {
batch.size = VectorizedRowBatch.DEFAULT_SIZE;
for (int i = 0; i != batch.numCols; i++) {
batch.cols[i] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
setRandomLongCol((LongColumnVector) batch.cols[i]);
}
}
public static void setSample(VectorizedRowBatch batch) {
batch.size = VectorizedRowBatch.DEFAULT_SIZE;
for (int i = 0; i != batch.numCols; i++) {
setSampleLongCol((LongColumnVector) batch.cols[i]);
}
}
/**
* Set to sample data, re-using existing columns in batch.
*
* @param batch
*/
public static void setSampleOverwrite(VectorizedRowBatch batch) {
// Put sample data in the columns.
for (int i = 0; i != batch.numCols; i++) {
setSampleLongCol((LongColumnVector) batch.cols[i]);
}
// Reset the selection vector.
batch.selectedInUse = false;
batch.size = VectorizedRowBatch.DEFAULT_SIZE;
}
/**
* Sprinkle null values in this column vector.
*
* @param col
*/
public static void addRandomNulls(ColumnVector col) {
col.noNulls = false;
Random rand = new Random();
for(int i = 0; i != col.isNull.length; i++) {
col.isNull[i] = Math.abs(rand.nextInt() % 11) == 0;
}
}
/**
* Add null values, but do it faster, by avoiding use of Random().
*
* @param col
*/
public void addSampleNulls(ColumnVector col) {
col.noNulls = false;
assert col.isNull != null;
for(int i = 0; i != col.isNull.length; i++) {
col.isNull[i] = i % 11 == 0;
}
}
public static void addRandomNulls(VectorizedRowBatch batch) {
for (int i = 0; i != batch.numCols; i++) {
addRandomNulls(batch.cols[i]);
}
}
public void addSampleNulls(VectorizedRowBatch batch) {
for (int i = 0; i != batch.numCols; i++) {
addSampleNulls(batch.cols[i]);
}
}
/**
* Set vector elements to sample string data from colorsBytes string table.
* @param col
*/
public static void setSampleStringCol(BytesColumnVector col) {
initColors();
int size = col.vector.length;
for(int i = 0; i != size; i++) {
int pos = i % colorsBytes.length;
col.setRef(i, colorsBytes[pos], 0, colorsBytes[pos].length);
}
}
/*
* Initialize string table in a lazy fashion.
*/
private static void initColors() {
if (colorsBytes == null) {
colorsBytes = new byte[COLORS.length][];
for (int i = 0; i != COLORS.length; i++) {
colorsBytes[i] = COLORS[i].getBytes();
}
}
}
/**
* Set the vector to sample data that repeats an iteration from 0 to 99.
* @param col
*/
public static void setSampleLongCol(LongColumnVector col) {
int size = col.vector.length;
for(int i = 0; i != size; i++) {
col.vector[i] = i % 100;
}
}
/**
* Set the vector to random data in the range 0 to 99.
* This has significant overhead for random number generation. Use setSample() to reduce overhead.
*/
public static void setRandomLongCol(LongColumnVector col) {
int size = col.vector.length;
Random rand = new Random(System.currentTimeMillis());
for(int i = 0; i != size; i++) {
col.vector[i] = Math.abs(rand.nextInt() % 100);
}
}
public static void setRepeatingLongCol(LongColumnVector col) {
col.isRepeating = true;
col.vector[0] = 50;
}
/**
* Set the vector to sample data that repeats an iteration from 0 to 99.
* @param col
*/
public static void setSampleDoubleCol(DoubleColumnVector col) {
int size = col.vector.length;
for(int i = 0; i != size; i++) {
col.vector[i] = i % 100;
}
}
/**
* Set the vector to random data in the range 0 to 99.
* This has significant overhead for random number generation. Use setSample() to reduce overhead.
*/
public static void setRandomDoubleCol(DoubleColumnVector col) {
int size = col.vector.length;
Random rand = new Random();
for(int i = 0; i != size; i++) {
col.vector[i] = Math.abs(rand.nextInt() % 100);
}
}
public static void setRepeatingDoubleCol(DoubleColumnVector col) {
col.isRepeating = true;
col.vector[0] = 50.0;
}
@Test
public void testFlatten() {
verifyFlatten(new LongColumnVector());
verifyFlatten(new DoubleColumnVector());
verifyFlatten(new BytesColumnVector());
}
private void verifyFlatten(ColumnVector v) {
// verify that flattening and unflattenting no-nulls works
v.noNulls = true;
v.isNull[1] = true;
int[] sel = {0, 2};
int size = 2;
v.flatten(true, sel, size);
Assert.assertFalse(v.noNulls);
Assert.assertFalse(v.isNull[0] || v.isNull[2]);
v.unFlatten();
Assert.assertTrue(v.noNulls);
// verify that flattening and unflattening "isRepeating" works
v.isRepeating = true;
v.noNulls = false;
v.isNull[0] = true;
v.flatten(true, sel, 2);
Assert.assertFalse(v.noNulls);
Assert.assertTrue(v.isNull[0] && v.isNull[2]);
Assert.assertFalse(v.isRepeating);
v.unFlatten();
Assert.assertFalse(v.noNulls);
Assert.assertTrue(v.isRepeating);
// verify extension of values in the array
v.noNulls = true;
if (v instanceof LongColumnVector) {
((LongColumnVector) v).vector[0] = 100;
v.flatten(true, sel, 2);
Assert.assertTrue(((LongColumnVector) v).vector[2] == 100);
} else if (v instanceof DoubleColumnVector) {
((DoubleColumnVector) v).vector[0] = 200d;
v.flatten(true, sel, 2);
Assert.assertTrue(((DoubleColumnVector) v).vector[2] == 200d);
} else if (v instanceof BytesColumnVector) {
BytesColumnVector bv = (BytesColumnVector) v;
byte[] b = null;
try {
b = "foo".getBytes("UTF-8");
} catch (Exception e) {
; // eat it
}
bv.setRef(0, b, 0, b.length);
bv.flatten(true, sel, 2);
Assert.assertEquals(bv.vector[0], bv.vector[2]);
Assert.assertEquals(bv.start[0], bv.start[2]);
Assert.assertEquals(bv.length[0], bv.length[2]);
}
}
}