/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector.util; import java.sql.Timestamp; import java.util.Random; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; public class VectorizedRowGroupGenUtil { private static final long LONG_VECTOR_NULL_VALUE = 1; private static final double DOUBLE_VECTOR_NULL_VALUE = Double.NaN; public static VectorizedRowBatch getVectorizedRowBatch(int size, int numCol, int seed) { VectorizedRowBatch vrg = new VectorizedRowBatch(numCol, size); for (int j = 0; j < numCol; j++) { LongColumnVector lcv = new LongColumnVector(size); for (int i = 0; i < size; i++) { lcv.vector[i] = (i+1) * seed * (j+1); } vrg.cols[j] = lcv; } vrg.size = size; return vrg; } public static LongColumnVector generateLongColumnVector( boolean nulls, boolean repeating, int size, Random rand) { LongColumnVector lcv = new LongColumnVector(size); lcv.noNulls = !nulls; lcv.isRepeating = repeating; long repeatingValue; do{ repeatingValue= rand.nextLong(); }while(repeatingValue == 0); int nullFrequency = generateNullFrequency(rand); for(int i = 0; i < size; i++) { if(nulls && (repeating || i % nullFrequency == 0)) { lcv.isNull[i] = true; lcv.vector[i] = LONG_VECTOR_NULL_VALUE; }else { lcv.isNull[i] = false; lcv.vector[i] = repeating ? repeatingValue : rand.nextLong(); if(lcv.vector[i] == 0) { i--; } } } return lcv; } public static TimestampColumnVector generateTimestampColumnVector( boolean nulls, boolean repeating, int size, Random rand, Timestamp[] timestampValues) { TimestampColumnVector tcv = new TimestampColumnVector(size); tcv.noNulls = !nulls; tcv.isRepeating = repeating; Timestamp repeatingTimestamp = RandomTypeUtil.getRandTimestamp(rand); int nullFrequency = generateNullFrequency(rand); for(int i = 0; i < size; i++) { if(nulls && (repeating || i % nullFrequency == 0)) { tcv.isNull[i] = true; tcv.setNullValue(i); timestampValues[i] = null; }else { tcv.isNull[i] = false; if (!repeating) { Timestamp randomTimestamp = RandomTypeUtil.getRandTimestamp(rand); tcv.set(i, randomTimestamp); timestampValues[i] = randomTimestamp; } else { tcv.set(i, repeatingTimestamp); timestampValues[i] = repeatingTimestamp; } } } return tcv; } public static DoubleColumnVector generateDoubleColumnVector(boolean nulls, boolean repeating, int size, Random rand) { DoubleColumnVector dcv = new DoubleColumnVector(size); dcv.noNulls = !nulls; dcv.isRepeating = repeating; double repeatingValue; do{ repeatingValue= rand.nextDouble(); }while(repeatingValue == 0); int nullFrequency = generateNullFrequency(rand); for(int i = 0; i < size; i++) { if(nulls && (repeating || i % nullFrequency == 0)) { dcv.isNull[i] = true; dcv.vector[i] = DOUBLE_VECTOR_NULL_VALUE; }else { dcv.isNull[i] = false; dcv.vector[i] = repeating ? repeatingValue : rand.nextDouble(); if(dcv.vector[i] == 0) { i--; } } } return dcv; } public static DecimalColumnVector generateDecimalColumnVector(DecimalTypeInfo typeInfo, boolean nulls, boolean repeating, int size, Random rand) { DecimalColumnVector dcv = new DecimalColumnVector(size, typeInfo.precision(), typeInfo.scale()); dcv.noNulls = !nulls; dcv.isRepeating = repeating; HiveDecimalWritable repeatingValue = new HiveDecimalWritable(); do{ repeatingValue.set(HiveDecimal.create(((Double) rand.nextDouble()).toString()).setScale((short)typeInfo.scale(), HiveDecimal.ROUND_HALF_UP)); }while(repeatingValue.getHiveDecimal().doubleValue() == 0); int nullFrequency = generateNullFrequency(rand); for(int i = 0; i < size; i++) { if(nulls && (repeating || i % nullFrequency == 0)) { dcv.isNull[i] = true; dcv.vector[i] = null; }else { dcv.isNull[i] = false; if (repeating) { dcv.vector[i].set(repeatingValue); } else { dcv.vector[i].set(HiveDecimal.create(((Double) rand.nextDouble()).toString()).setScale((short) typeInfo.scale(), HiveDecimal.ROUND_HALF_UP)); } if(dcv.vector[i].getHiveDecimal().doubleValue() == 0) { i--; } } } return dcv; } private static int generateNullFrequency(Random rand) { return 60 + rand.nextInt(20); } }