/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Random;
import junit.framework.Assert;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hive.common.type.RandomTypeUtil;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFHour;
import org.apache.hadoop.hive.ql.udf.UDFMinute;
import org.apache.hadoop.hive.ql.udf.UDFMonth;
import org.apache.hadoop.hive.ql.udf.UDFSecond;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.junit.Test;
/**
* Unit tests for timestamp expressions.
*/
public class TestVectorTimestampExpressions {
private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private Timestamp[] getAllBoundaries(int minYear, int maxYear) {
ArrayList<Timestamp> boundaries = new ArrayList<Timestamp>(1);
Calendar c = Calendar.getInstance();
c.setTimeInMillis(0); // c.set doesn't reset millis
for (int year = minYear; year <= maxYear; year++) {
c.set(year, Calendar.JANUARY, 1, 0, 0, 0);
if (c.get(Calendar.YEAR) < 0 || c.get(Calendar.YEAR) >= 10000) {
continue;
}
long exactly = c.getTimeInMillis();
/* one second before and after */
long before = exactly - 1000;
long after = exactly + 1000;
if (minYear != 0) {
boundaries.add(new Timestamp(before));
}
boundaries.add(new Timestamp(exactly));
if (year != maxYear) {
boundaries.add(new Timestamp(after));
}
}
return boundaries.toArray(new Timestamp[0]);
}
private Timestamp[] getAllBoundaries() {
return getAllBoundaries(RandomTypeUtil.MIN_YEAR, RandomTypeUtil.MAX_YEAR);
}
private VectorizedRowBatch getVectorizedRandomRowBatchTimestampLong(int seed, int size) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
TimestampColumnVector tcv = new TimestampColumnVector(size);
Random rand = new Random(seed);
for (int i = 0; i < size; i++) {
tcv.set(i, RandomTypeUtil.getRandTimestamp(rand));
}
batch.cols[0] = tcv;
batch.cols[1] = new LongColumnVector(size);
batch.size = size;
return batch;
}
private VectorizedRowBatch getVectorizedRandomRowBatchStringLong(int seed, int size) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
BytesColumnVector bcv = new BytesColumnVector(size);
Random rand = new Random(seed);
for (int i = 0; i < size; i++) {
/* all 32 bit numbers qualify & multiply up to get nano-seconds */
byte[] encoded = encodeTime(RandomTypeUtil.getRandTimestamp(rand));
bcv.vector[i] = encoded;
bcv.start[i] = 0;
bcv.length[i] = encoded.length;
}
batch.cols[0] = bcv;
batch.cols[1] = new LongColumnVector(size);
batch.size = size;
return batch;
}
private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, TestType testType) {
switch (testType) {
case TIMESTAMP_LONG:
return getVectorizedRandomRowBatchTimestampLong(seed, size);
case STRING_LONG:
return getVectorizedRandomRowBatchStringLong(seed, size);
default:
throw new IllegalArgumentException();
}
}
/*
* Input array is used to fill the entire size of the vector row batch
*/
private VectorizedRowBatch getVectorizedRowBatchTimestampLong(Timestamp[] inputs, int size) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
TimestampColumnVector tcv = new TimestampColumnVector(size);
for (int i = 0; i < size; i++) {
tcv.set(i, inputs[i % inputs.length]);
}
batch.cols[0] = tcv;
batch.cols[1] = new LongColumnVector(size);
batch.size = size;
return batch;
}
/*
* Input array is used to fill the entire size of the vector row batch
*/
private VectorizedRowBatch getVectorizedRowBatchStringLong(Timestamp[] inputs, int size) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
BytesColumnVector bcv = new BytesColumnVector(size);
for (int i = 0; i < size; i++) {
byte[] encoded = encodeTime(inputs[i % inputs.length]);
bcv.vector[i] = encoded;
bcv.start[i] = 0;
bcv.length[i] = encoded.length;
}
batch.cols[0] = bcv;
batch.cols[1] = new LongColumnVector(size);
batch.size = size;
return batch;
}
private VectorizedRowBatch getVectorizedRowBatchStringLong(byte[] vector, int start, int length) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
BytesColumnVector bcv = new BytesColumnVector(1);
bcv.vector[0] = vector;
bcv.start[0] = start;
bcv.length[0] = length;
batch.cols[0] = bcv;
batch.cols[1] = new LongColumnVector(1);
batch.size = 1;
return batch;
}
private VectorizedRowBatch getVectorizedRowBatch(Timestamp[] inputs, int size, TestType testType) {
switch (testType) {
case TIMESTAMP_LONG:
return getVectorizedRowBatchTimestampLong(inputs, size);
case STRING_LONG:
return getVectorizedRowBatchStringLong(inputs, size);
default:
throw new IllegalArgumentException();
}
}
private byte[] encodeTime(Timestamp timestamp) {
ByteBuffer encoded;
long time = timestamp.getTime();
try {
String formatted = dateFormat.format(new Date(time));
encoded = Text.encode(formatted);
} catch (CharacterCodingException e) {
throw new RuntimeException(e);
}
return Arrays.copyOf(encoded.array(), encoded.limit());
}
private Timestamp decodeTime(byte[] time) {
try {
return new Timestamp(dateFormat.parse(Text.decode(time)).getTime());
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private Timestamp readVectorElementAt(ColumnVector col, int i) {
if (col instanceof TimestampColumnVector) {
return ((TimestampColumnVector) col).asScratchTimestamp(i);
}
if (col instanceof BytesColumnVector) {
byte[] timeBytes = ((BytesColumnVector) col).vector[i];
return decodeTime(timeBytes);
}
throw new IllegalArgumentException();
}
private enum TestType {
TIMESTAMP_LONG, STRING_LONG
}
private void compareToUDFYearLong(Timestamp t, int y) {
UDFYear udf = new UDFYear();
TimestampWritable tsw = new TimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
if (res.get() != y) {
System.out.printf("%d vs %d for %s, %d\n", res.get(), y, t.toString(),
tsw.getTimestamp().getTime()/1000);
}
Assert.assertEquals(res.get(), y);
}
private void verifyUDFYear(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf = null;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFYearTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFYearString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFYearLong(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFYear(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFYear(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFYear(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFYear(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFYear(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFYear(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFYear(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFYear(batch, testType);
}
@Test
public void testVectorUDFYearTimestamp() {
testVectorUDFYear(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFYearString() {
testVectorUDFYear(TestType.STRING_LONG);
VectorizedRowBatch batch = getVectorizedRowBatchStringLong(new byte[] {'2', '2', '0', '1', '3'}, 1, 3);
VectorExpression udf = new VectorUDFYearString(0, 1);
udf.evaluate(batch);
LongColumnVector lcv = (LongColumnVector) batch.cols[1];
Assert.assertEquals(false, batch.cols[0].isNull[0]);
Assert.assertEquals(true, lcv.isNull[0]);
}
private void compareToUDFDayOfMonthLong(Timestamp t, int y) {
UDFDayOfMonth udf = new UDFDayOfMonth();
TimestampWritable tsw = new TimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFDayOfMonth(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf = null;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFDayOfMonthTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFDayOfMonthString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFDayOfMonthLong(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFDayOfMonth(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFDayOfMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFDayOfMonth(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFDayOfMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFDayOfMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFDayOfMonth(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFDayOfMonth(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFDayOfMonth(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFDayOfMonth(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFDayOfMonth(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFDayOfMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFDayOfMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFDayOfMonth(batch, testType);
}
@Test
public void testVectorUDFDayOfMonthTimestamp() {
testVectorUDFDayOfMonth(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFDayOfMonthString() {
testVectorUDFDayOfMonth(TestType.STRING_LONG);
}
private void compareToUDFHourLong(Timestamp t, int y) {
UDFHour udf = new UDFHour();
TimestampWritable tsw = new TimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFHour(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf = null;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFHourTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFHourString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFHourLong(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFHour(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFHour(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFHour(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFHour(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFHour(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFHour(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFHour(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFHour(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFHour(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFHour(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFHour(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFHour(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFHour(batch, testType);
}
@Test
public void testVectorUDFHourTimestamp() {
testVectorUDFHour(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFHourString() {
testVectorUDFHour(TestType.STRING_LONG);
}
private void compareToUDFMinuteLong(Timestamp t, int y) {
UDFMinute udf = new UDFMinute();
TimestampWritable tsw = new TimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFMinute(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf = null;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFMinuteTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFMinuteString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFMinuteLong(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFMinute(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFMinute(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMinute(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFMinute(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMinute(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFMinute(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFMinute(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFMinute(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFMinute(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFMinute(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFMinute(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMinute(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFMinute(batch, testType);
}
@Test
public void testVectorUDFMinuteLong() {
testVectorUDFMinute(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFMinuteString() {
testVectorUDFMinute(TestType.STRING_LONG);
}
private void compareToUDFMonthLong(Timestamp t, int y) {
UDFMonth udf = new UDFMonth();
TimestampWritable tsw = new TimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFMonth(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFMonthTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFMonthString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFMonthLong(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFMonth(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMonth(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFMonth(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFMonth(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFMonth(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFMonth(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFMonth(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMonth(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFMonth(batch, testType);
}
@Test
public void testVectorUDFMonthTimestamp() {
testVectorUDFMonth(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFMonthString() {
testVectorUDFMonth(TestType.STRING_LONG);
}
private void compareToUDFSecondLong(Timestamp t, int y) {
UDFSecond udf = new UDFSecond();
TimestampWritable tsw = new TimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFSecond(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFSecondTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFSecondString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFSecondLong(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFSecond(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFSecond(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFSecond(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFSecond(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFSecond(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFSecond(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFSecond(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFSecond(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFSecond(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFSecond(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFSecond(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFSecond(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFSecond(batch, testType);
}
@Test
public void testVectorUDFSecondLong() {
testVectorUDFSecond(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFSecondString() {
testVectorUDFSecond(TestType.STRING_LONG);
}
private void compareToUDFUnixTimeStampLong(Timestamp ts, long y) {
long seconds = ts.getTime() / 1000;
if(seconds != y) {
System.out.printf("%d vs %d for %s\n", seconds, y, ts.toString());
Assert.assertTrue(false);
}
}
private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFUnixTimeStampTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFUnixTimeStampString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[out].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFUnixTimeStampLong(t, y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFUnixTimeStamp(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFUnixTimeStamp(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFUnixTimeStamp(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFUnixTimeStamp(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFUnixTimeStamp(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFUnixTimeStamp(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFUnixTimeStamp(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFUnixTimeStamp(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFUnixTimeStamp(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFUnixTimeStamp(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFUnixTimeStamp(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFUnixTimeStamp(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFUnixTimeStamp(batch, testType);
}
@Test
public void testVectorUDFUnixTimeStampTimestamp() {
testVectorUDFUnixTimeStamp(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFUnixTimeStampString() {
testVectorUDFUnixTimeStamp(TestType.STRING_LONG);
}
private void compareToUDFWeekOfYearLong(Timestamp t, int y) {
UDFWeekOfYear udf = new UDFWeekOfYear();
TimestampWritable tsw = new TimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFWeekOfYear(VectorizedRowBatch batch, TestType testType) {
VectorExpression udf;
if (testType == TestType.TIMESTAMP_LONG) {
udf = new VectorUDFWeekOfYearTimestamp(0, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP);
} else {
udf = new VectorUDFWeekOfYearString(0, 1);
udf.setInputTypes(VectorExpression.Type.STRING);
}
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
Timestamp t = readVectorElementAt(batch.cols[in], i);
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFWeekOfYearLong(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
private void testVectorUDFWeekOfYear(TestType testType) {
VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)},
VectorizedRowBatch.DEFAULT_SIZE, testType);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFWeekOfYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFWeekOfYear(batch, testType);
Timestamp[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
verifyUDFWeekOfYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFWeekOfYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFWeekOfYear(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
verifyUDFWeekOfYear(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFWeekOfYear(batch, testType);
batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType);
batch.cols[0].isRepeating = true;
batch.selectedInUse = true;
batch.selected = new int[] {42};
verifyUDFWeekOfYear(batch, testType);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFWeekOfYear(batch, testType);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
verifyUDFWeekOfYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFWeekOfYear(batch, testType);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFWeekOfYear(batch, testType);
}
@Test
public void testVectorUDFWeekOfYearTimestamp() {
testVectorUDFWeekOfYear(TestType.TIMESTAMP_LONG);
}
@Test
public void testVectorUDFWeekOfYearString() {
testVectorUDFWeekOfYear(TestType.STRING_LONG);
}
public static void main(String[] args) {
TestVectorTimestampExpressions self = new TestVectorTimestampExpressions();
self.testVectorUDFYearTimestamp();
self.testVectorUDFMonthTimestamp();
self.testVectorUDFDayOfMonthTimestamp();
self.testVectorUDFHourTimestamp();
self.testVectorUDFWeekOfYearTimestamp();
self.testVectorUDFUnixTimeStampTimestamp();
self.testVectorUDFYearString();
self.testVectorUDFMonthString();
self.testVectorUDFDayOfMonthString();
self.testVectorUDFHourString();
self.testVectorUDFWeekOfYearString();
self.testVectorUDFUnixTimeStampString();
}
}