/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; import java.util.List; import junit.framework.Assert; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import org.junit.Test; /** * Test cases for vectorized filter operator. */ public class TestVectorFilterOperator { HiveConf hconf = new HiveConf(); /** * Fundamental logic and performance tests for vector filters belong here. * * For tests about filters to cover specific operator and data type combinations, * see also the other filter tests under org.apache.hadoop.hive.ql.exec.vector.expressions */ public static class FakeDataReader { private final int size; private final VectorizedRowBatch vrg; private int currentSize = 0; private final int numCols; private final int len = 1024; public FakeDataReader(int size, int numCols) { this.size = size; this.numCols = numCols; vrg = new VectorizedRowBatch(numCols, len); for (int i = 0; i < numCols; i++) { try { Thread.sleep(2); } catch (InterruptedException ignore) {} vrg.cols[i] = getLongVector(len); } } public VectorizedRowBatch getNext() { if (currentSize >= size) { vrg.size = 0; return vrg; } else { vrg.size = len; currentSize += vrg.size; vrg.selectedInUse = false; return vrg; } } private LongColumnVector getLongVector(int len) { LongColumnVector lcv = new LongColumnVector(len); TestVectorizedRowBatch.setRandomLongCol(lcv); return lcv; } } private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); List<String> columns = new ArrayList<String>(); columns.add("col1"); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); Operator<? extends OperatorDesc> filterOp = OperatorFactory.get(new CompilationOpContext(), fdesc); VectorizationContext vc = new VectorizationContext("name", columns); return (VectorFilterOperator) Vectorizer.vectorizeFilterOperator(filterOp, vc); } @Test public void testBasicFilterOperator() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); vfo.initialize(hconf, null); VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); VectorExpression ve3 = new FilterExprAndExpr(); ve3.setChildExpressions(new VectorExpression[] {ve1, ve2}); vfo.setFilterCondition(ve3); FakeDataReader fdr = new FakeDataReader(1024*1, 3); VectorizedRowBatch vrg = fdr.getNext(); vfo.getConditionEvaluator().evaluate(vrg); //Verify int rows = 0; for (int i =0; i < 1024; i++){ LongColumnVector l1 = (LongColumnVector) vrg.cols[0]; LongColumnVector l2 = (LongColumnVector) vrg.cols[1]; LongColumnVector l3 = (LongColumnVector) vrg.cols[2]; if ((l1.vector[i] > l2.vector[i]) && (l3.vector[i] == 0)) { rows ++; } } Assert.assertEquals(rows, vrg.size); } @Test public void testBasicFilterLargeData() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); vfo.initialize(hconf, null); VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); VectorExpression ve3 = new FilterExprAndExpr(); ve3.setChildExpressions(new VectorExpression[] {ve1, ve2}); vfo.setFilterCondition(ve3); FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3); long startTime = System.currentTimeMillis(); VectorizedRowBatch vrg = fdr.getNext(); while (vrg.size > 0) { vfo.process(vrg, 0); vrg = fdr.getNext(); } long endTime = System.currentTimeMillis(); System.out.println("testBaseFilterOperator Op Time = "+(endTime-startTime)); //Base time fdr = new FakeDataReader(16*1024*1024, 3); long startTime1 = System.currentTimeMillis(); vrg = fdr.getNext(); LongColumnVector l1 = (LongColumnVector) vrg.cols[0]; LongColumnVector l2 = (LongColumnVector) vrg.cols[1]; LongColumnVector l3 = (LongColumnVector) vrg.cols[2]; int rows = 0; for (int j =0; j < 16 *1024; j++) { for (int i = 0; i < l1.vector.length && i < l2.vector.length && i < l3.vector.length; i++) { if ((l1.vector[i] > l2.vector[i]) && (l3.vector[i] == 0)) { rows++; } } } long endTime1 = System.currentTimeMillis(); System.out.println("testBaseFilterOperator base Op Time = "+(endTime1-startTime1)); } }