/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector.expressions; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.UnsupportedEncodingException; import java.math.BigDecimal; import java.math.MathContext; import java.math.RoundingMode; import java.sql.Timestamp; import java.util.Arrays; import java.util.Random; import java.util.concurrent.TimeUnit; import junit.framework.Assert; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.exec.vector.expressions.*; import org.apache.hadoop.hive.ql.util.TimestampUtils; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.junit.Test; /** * Test VectorExpression classes for vectorized implementations of type casts. */ public class TestVectorTypeCasts { @Test public void testVectorCastLongToDouble() { VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInDoubleOut(); DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastLongToDouble(0, 1); expr.evaluate(b); Assert.assertEquals(2.0, resultV.vector[4]); } @Test public void testVectorCastDoubleToLong() { VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut(); LongColumnVector resultV = (LongColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastDoubleToLong(0, 1); expr.evaluate(b); Assert.assertEquals(1, resultV.vector[6]); } @Test public void testCastDoubleToBoolean() { VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut(); LongColumnVector resultV = (LongColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastDoubleToBooleanViaDoubleToLong(0, 1); expr.evaluate(b); Assert.assertEquals(0, resultV.vector[3]); Assert.assertEquals(1, resultV.vector[4]); } @Test public void testCastDoubleToTimestamp() { VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInTimestampOut(); TimestampColumnVector resultV = (TimestampColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastDoubleToTimestamp(0, 1); expr.evaluate(b); Assert.assertEquals(0.0, TimestampUtils.getDouble(resultV.asScratchTimestamp(3))); Assert.assertEquals(0.5d, TimestampUtils.getDouble(resultV.asScratchTimestamp(4))); } @Test public void testCastLongToBoolean() { VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut(); LongColumnVector inV = (LongColumnVector) b.cols[0]; inV.vector[0] = 0; // make one entry produce false in result LongColumnVector resultV = (LongColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastLongToBooleanViaLongToLong(0, 1); expr.evaluate(b); Assert.assertEquals(0, resultV.vector[0]); Assert.assertEquals(1, resultV.vector[1]); } @Test public void testCastLongToTimestamp() { long[] longValues = new long[500]; VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInTimestampOut(longValues); TimestampColumnVector resultV = (TimestampColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastLongToTimestamp(0, 1); expr.evaluate(b); for (int i = 0; i < longValues.length; i++) { Timestamp timestamp = resultV.asScratchTimestamp(i); long actual = TimestampWritable.getLong(timestamp); assertEquals(actual, longValues[i]); } } @Test public void testCastTimestampToLong() { long[] longValues = new long[500]; VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchTimestampInLongOut(longValues); TimestampColumnVector inV = (TimestampColumnVector) b.cols[0]; LongColumnVector resultV = (LongColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastTimestampToLong(0, 1); expr.evaluate(b); for (int i = 0; i < longValues.length; i++) { long actual = resultV.vector[i]; long timestampLong = inV.getTimestampAsLong(i); if (actual != timestampLong) { assertTrue(false); } } } @Test public void testCastTimestampToDouble() { double[] doubleValues = new double[500]; VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchTimestampInDoubleOut(doubleValues); TimestampColumnVector inV = (TimestampColumnVector) b.cols[0]; DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; b.cols[0].noNulls = true; VectorExpression expr = new CastTimestampToDouble(0, 1); expr.evaluate(b); for (int i = 0; i < doubleValues.length; i++) { double actual = resultV.vector[i]; double doubleValue = TimestampUtils.getDouble(inV.asScratchTimestamp(i)); assertEquals(actual, doubleValue, 0.000000001F); } } public byte[] toBytes(String s) { byte[] b = null; try { b = s.getBytes("UTF-8"); } catch (Exception e) { throw new RuntimeException("Could not convert string to UTF-8 byte array."); } return b; } @Test public void testCastLongToString() { VectorizedRowBatch b = TestVectorMathFunctions.getBatchForStringMath(); BytesColumnVector resultV = (BytesColumnVector) b.cols[2]; b.cols[1].noNulls = true; VectorExpression expr = new CastLongToString(1, 2); expr.evaluate(b); byte[] num255 = toBytes("255"); Assert.assertEquals(0, StringExpr.compare(num255, 0, num255.length, resultV.vector[1], resultV.start[1], resultV.length[1])); } @Test public void testCastBooleanToString() { byte[] t = toBytes("TRUE"); byte[] f = toBytes("FALSE"); VectorizedRowBatch b = TestVectorMathFunctions.getBatchForStringMath(); LongColumnVector inV = (LongColumnVector) b.cols[1]; BytesColumnVector resultV = (BytesColumnVector) b.cols[2]; inV.vector[1] = 1; VectorExpression expr = new CastBooleanToStringViaLongToString(1, 2); expr.evaluate(b); Assert.assertEquals(0, StringExpr.compare(f, 0, f.length, resultV.vector[0], resultV.start[0], resultV.length[0])); Assert.assertEquals(0, StringExpr.compare(t, 0, t.length, resultV.vector[1], resultV.start[1], resultV.length[1])); } @Test public void testCastDecimalToLong() { // test basic case VectorizedRowBatch b = getBatchDecimalLong(); VectorExpression expr = new CastDecimalToLong(0, 1); // With the integer type range checking, we need to know the Hive data type. expr.setOutputType("bigint"); expr.evaluate(b); LongColumnVector r = (LongColumnVector) b.cols[1]; assertEquals(1, r.vector[0]); assertEquals(-2, r.vector[1]); assertEquals(9999999999999999L, r.vector[2]); // test with nulls in input b = getBatchDecimalLong(); b.cols[0].noNulls = false; b.cols[0].isNull[1] = true; expr.evaluate(b); r = (LongColumnVector) b.cols[1]; assertFalse(r.noNulls); assertTrue(r.isNull[1]); assertFalse(r.isNull[0]); assertEquals(1, r.vector[0]); // test repeating case b = getBatchDecimalLong(); b.cols[0].isRepeating = true; expr.evaluate(b); r = (LongColumnVector) b.cols[1]; assertTrue(r.isRepeating); assertEquals(1, r.vector[0]); // test repeating nulls case b = getBatchDecimalLong(); b.cols[0].isRepeating = true; b.cols[0].noNulls = false; b.cols[0].isNull[0] = true; expr.evaluate(b); r = (LongColumnVector) b.cols[1]; assertTrue(r.isRepeating); assertTrue(r.isNull[0]); } @Test /* Just spot check the basic case because code path is the same as * for cast of decimal to long due to inheritance. */ public void testCastDecimalToBoolean() { VectorizedRowBatch b = getBatchDecimalLong(); VectorExpression expr = new CastDecimalToBoolean(0, 1); DecimalColumnVector in = (DecimalColumnVector) b.cols[0]; in.vector[1].set(HiveDecimal.create(0)); expr.evaluate(b); LongColumnVector r = (LongColumnVector) b.cols[1]; assertEquals(1, r.vector[0]); assertEquals(0, r.vector[1]); assertEquals(1, r.vector[2]); } private VectorizedRowBatch getBatchDecimalLong() { VectorizedRowBatch b = new VectorizedRowBatch(2); DecimalColumnVector dv; short scale = 2; b.cols[0] = dv = new DecimalColumnVector(18, scale); b.cols[1] = new LongColumnVector(); b.size = 3; dv.vector[0].set(HiveDecimal.create("1.1")); dv.vector[1].set(HiveDecimal.create("-2.2")); dv.vector[2].set(HiveDecimal.create("9999999999999999.00")); return b; } @Test public void testCastDecimalToDouble() { final double eps = 0.000001d; // tolerance to check double equality // test basic case VectorizedRowBatch b = getBatchDecimalDouble(); VectorExpression expr = new CastDecimalToDouble(0, 1); expr.evaluate(b); DoubleColumnVector r = (DoubleColumnVector) b.cols[1]; assertEquals(1.1d, r.vector[0], eps); assertEquals(-2.2d, r.vector[1], eps); assertEquals(9999999999999999.0d, r.vector[2], eps); // test with nulls in input b = getBatchDecimalDouble(); b.cols[0].noNulls = false; b.cols[0].isNull[1] = true; expr.evaluate(b); r = (DoubleColumnVector) b.cols[1]; assertFalse(r.noNulls); assertTrue(r.isNull[1]); assertFalse(r.isNull[0]); assertEquals(1.1d, r.vector[0], eps); // test repeating case b = getBatchDecimalDouble(); b.cols[0].isRepeating = true; expr.evaluate(b); r = (DoubleColumnVector) b.cols[1]; assertTrue(r.isRepeating); assertEquals(1.1d, r.vector[0], eps); // test repeating nulls case b = getBatchDecimalDouble(); b.cols[0].isRepeating = true; b.cols[0].noNulls = false; b.cols[0].isNull[0] = true; expr.evaluate(b); r = (DoubleColumnVector) b.cols[1]; assertTrue(r.isRepeating); assertTrue(r.isNull[0]); } private VectorizedRowBatch getBatchDecimalDouble() { VectorizedRowBatch b = new VectorizedRowBatch(2); DecimalColumnVector dv; short scale = 2; b.cols[0] = dv = new DecimalColumnVector(18, scale); b.cols[1] = new DoubleColumnVector(); b.size = 3; dv.vector[0].set(HiveDecimal.create("1.1")); dv.vector[1].set(HiveDecimal.create("-2.2")); dv.vector[2].set(HiveDecimal.create("9999999999999999.00")); return b; } @Test public void testCastDecimalToString() { VectorizedRowBatch b = getBatchDecimalString(); VectorExpression expr = new CastDecimalToString(0, 1); expr.evaluate(b); BytesColumnVector r = (BytesColumnVector) b.cols[1]; // As of HIVE-8745, these decimal values should be trimmed of trailing zeros. byte[] v = toBytes("1.1"); assertTrue(((Integer) v.length).toString() + " " + r.length[0], v.length == r.length[0]); Assert.assertEquals(0, StringExpr.compare(v, 0, v.length, r.vector[0], r.start[0], r.length[0])); v = toBytes("-2.2"); Assert.assertEquals(0, StringExpr.compare(v, 0, v.length, r.vector[1], r.start[1], r.length[1])); v = toBytes("9999999999999999"); Assert.assertEquals(0, StringExpr.compare(v, 0, v.length, r.vector[2], r.start[2], r.length[2])); } private VectorizedRowBatch getBatchDecimalString() { VectorizedRowBatch b = new VectorizedRowBatch(2); DecimalColumnVector dv; short scale = 2; b.cols[0] = dv = new DecimalColumnVector(18, scale); b.cols[1] = new BytesColumnVector(); b.size = 3; dv.vector[0].set(HiveDecimal.create("1.1")); dv.vector[1].set(HiveDecimal.create("-2.2")); dv.vector[2].set(HiveDecimal.create("9999999999999999.00")); return b; } @Test public void testCastDecimalToTimestamp() { double[] doubleValues = new double[500]; VectorizedRowBatch b = getBatchDecimalTimestamp(doubleValues); VectorExpression expr = new CastDecimalToTimestamp(0, 1); expr.evaluate(b); TimestampColumnVector r = (TimestampColumnVector) b.cols[1]; for (int i = 0; i < doubleValues.length; i++) { Timestamp timestamp = r.asScratchTimestamp(i); double asDouble = TimestampUtils.getDouble(timestamp); double expectedDouble = doubleValues[i]; if (expectedDouble != asDouble) { assertTrue(false); } } } private VectorizedRowBatch getBatchDecimalLong2() { VectorizedRowBatch b = new VectorizedRowBatch(2); DecimalColumnVector dv; short scale = 9; b.cols[0] = dv = new DecimalColumnVector(18, scale); b.cols[1] = new LongColumnVector(); b.size = 3; dv.vector[0].set(HiveDecimal.create("1.111111111")); dv.vector[1].set(HiveDecimal.create("-2.222222222")); dv.vector[2].set(HiveDecimal.create("31536000.999999999")); return b; } private VectorizedRowBatch getBatchDecimalTimestamp(double[] doubleValues) { VectorizedRowBatch b = new VectorizedRowBatch(2); DecimalColumnVector dv; b.cols[0] = dv = new DecimalColumnVector(doubleValues.length, HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); b.cols[1] = new TimestampColumnVector(doubleValues.length); dv.noNulls = true; Random r = new Random(94830); for (int i = 0; i < doubleValues.length; i++) { long millis = RandomTypeUtil.randomMillis(r); Timestamp ts = new Timestamp(millis); int nanos = RandomTypeUtil.randomNanos(r); ts.setNanos(nanos); TimestampWritable tsw = new TimestampWritable(ts); double asDouble = tsw.getDouble(); doubleValues[i] = asDouble; HiveDecimal hiveDecimal = HiveDecimal.create(new BigDecimal(asDouble)); dv.set(i, hiveDecimal); } b.size = doubleValues.length; return b; } @Test public void testCastLongToDecimal() { VectorizedRowBatch b = getBatchLongDecimal(); VectorExpression expr = new CastLongToDecimal(0, 1); expr.evaluate(b); DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("0"))); assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("-1"))); assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("99999999999999"))); } private VectorizedRowBatch getBatchLongDecimal() { VectorizedRowBatch b = new VectorizedRowBatch(2); LongColumnVector lv; b.cols[0] = lv = new LongColumnVector(); b.cols[1] = new DecimalColumnVector(18, 2); lv.vector[0] = 0; lv.vector[1] = -1; lv.vector[2] = 99999999999999L; return b; } public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); public static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); public static final long NANOSECONDS_PER_MILLISSECOND = TimeUnit.MILLISECONDS.toNanos(1); private VectorizedRowBatch getBatchTimestampDecimal(HiveDecimal[] hiveDecimalValues) { Random r = new Random(994); VectorizedRowBatch b = new VectorizedRowBatch(2); TimestampColumnVector tcv; b.cols[0] = tcv = new TimestampColumnVector(hiveDecimalValues.length); b.cols[1] = new DecimalColumnVector(hiveDecimalValues.length, HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); for (int i = 0; i < hiveDecimalValues.length; i++) { int optionalNanos = 0; switch (r.nextInt(4)) { case 0: // No nanos. break; case 1: optionalNanos = r.nextInt((int) NANOSECONDS_PER_SECOND); break; case 2: // Limit to milliseconds only... optionalNanos = r.nextInt((int) MILLISECONDS_PER_SECOND) * (int) NANOSECONDS_PER_MILLISSECOND; break; case 3: // Limit to below milliseconds only... optionalNanos = r.nextInt((int) NANOSECONDS_PER_MILLISSECOND); break; } long millis = RandomTypeUtil.randomMillis(r); Timestamp ts = new Timestamp(millis); ts.setNanos(optionalNanos); TimestampWritable tsw = new TimestampWritable(ts); hiveDecimalValues[i] = tsw.getHiveDecimal(); tcv.set(i, ts); } b.size = hiveDecimalValues.length; return b; } @Test public void testCastDoubleToDecimal() { VectorizedRowBatch b = getBatchDoubleDecimal(); VectorExpression expr = new CastDoubleToDecimal(0, 1); expr.evaluate(b); DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; HiveDecimal hd0 = HiveDecimal.create("0.0"); if (!hd0.equals(r.vector[0].getHiveDecimal())) { assertTrue(false); } HiveDecimal hd1 = HiveDecimal.create("-1.0"); if (!hd1.equals(r.vector[1].getHiveDecimal())) { assertTrue(false); } HiveDecimal hd2 = HiveDecimal.create("99999999999999"); if (!hd2.equals(r.vector[2].getHiveDecimal())) { assertTrue(false); } } private VectorizedRowBatch getBatchDoubleDecimal() { VectorizedRowBatch b = new VectorizedRowBatch(2); DoubleColumnVector dv; short scale = 2; b.cols[0] = dv = new DoubleColumnVector(); b.cols[1] = new DecimalColumnVector(18, scale); b.size = 3; dv.vector[0] = 0d; dv.vector[1] = -1d; dv.vector[2] = 99999999999999.0d; return b; } @Test public void testCastStringToDecimal() { VectorizedRowBatch b = getBatchStringDecimal(); VectorExpression expr = new CastStringToDecimal(0, 1); expr.evaluate(b); DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("1.10"))); assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("-2.20"))); assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("99999999999999.0"))); } private VectorizedRowBatch getBatchStringDecimal() { VectorizedRowBatch b = new VectorizedRowBatch(2); BytesColumnVector bv; b.cols[0] = bv = new BytesColumnVector(); b.cols[1] = new DecimalColumnVector(18, 2); bv.initBuffer(); byte[] x0 = toBytes("1.10"); byte[] x1 = toBytes("-2.20"); byte[] x2 = toBytes("99999999999999.0"); bv.setVal(0, x0, 0, x0.length); bv.setVal(1, x1, 0, x1.length); bv.setVal(2, x2, 0, x2.length); return b; } @Test public void testCastTimestampToDecimal() { // The input timestamps are stored as long values // measured in nanoseconds from the epoch. HiveDecimal[] hiveDecimalValues = new HiveDecimal[500]; VectorizedRowBatch b = getBatchTimestampDecimal(hiveDecimalValues); VectorExpression expr = new CastTimestampToDecimal(0, 1); TimestampColumnVector inT = (TimestampColumnVector) b.cols[0]; expr.evaluate(b); DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; for (int i = 0; i < hiveDecimalValues.length; i++) { HiveDecimal hiveDecimal = r.vector[i].getHiveDecimal(); HiveDecimal expectedHiveDecimal = hiveDecimalValues[i]; if (!hiveDecimal.equals(expectedHiveDecimal)) { assertTrue(false); } } // Try again with a value that won't fit in 5 digits, to make // sure that NULL is produced. b.cols[1] = r = new DecimalColumnVector(hiveDecimalValues.length, 5, 2); expr.evaluate(b); r = (DecimalColumnVector) b.cols[1]; for (int i = 0; i < hiveDecimalValues.length; i++) { HiveDecimal hiveDecimal = r.vector[i].getHiveDecimal(); HiveDecimal expectedHiveDecimal = hiveDecimalValues[i]; if (HiveDecimal.enforcePrecisionScale(expectedHiveDecimal, 5, 2) == null) { assertTrue(r.isNull[i]); } else { assertTrue(!r.isNull[i]); if (!hiveDecimal.equals(expectedHiveDecimal)) { assertTrue(false); } } } } /* This batch has output decimal column precision 5 and scale 2. * The goal is to allow testing of input long values that, when * converted to decimal, will not fit in the given precision. * Then it will be possible to check that the results are NULL. */ private VectorizedRowBatch getBatchLongDecimalPrec5Scale2() { VectorizedRowBatch b = new VectorizedRowBatch(2); LongColumnVector lv; b.cols[0] = lv = new LongColumnVector(); b.cols[1] = new DecimalColumnVector(5, 2); lv.vector[0] = 0; lv.vector[1] = -1; lv.vector[2] = 99999999999999L; return b; } private VectorizedRowBatch getBatchDecimalDecimal() { VectorizedRowBatch b = new VectorizedRowBatch(2); DecimalColumnVector v0, v1; b.cols[0] = v0 = new DecimalColumnVector(18, 4); b.cols[1] = v1 = new DecimalColumnVector(5, 2); v0.vector[0].set(HiveDecimal.create("10.0001")); v0.vector[1].set(HiveDecimal.create("-9999999.9999")); v1.vector[0].set(HiveDecimal.create("100.01")); v1.vector[1].set(HiveDecimal.create("-200.02")); b.size = 2; return b; } }